In [None]:
import xml.etree.ElementTree as ET
from os.path import isfile
import glob
from PIL import Image
from copy import deepcopy
import numpy as np

from smart_crop import smart_crop

# Crop Images
This script allows you to crop all of the images to the largest square that will fit all bounding boxes in the image.

One of the limitations of implementing this kind of cropping technique on the dataset is that the objects will (for the most part) always be at the center of the image, so a neural network could be biased toward always predicting that the object is at the center of the image.

In [None]:
def fix_annotation_issues(p, save_p):
    for ann_path in sorted([f for f in glob.glob(f"{p}*.xml") if isfile(f)]):
        xml_tree = ET.parse(ann_path)
        img_name = ann_path.split("/")[-1].split(".")[0]
        raw_img = Image.open(f"{p}{img_name}.jpg")

        print("Working on: "+img_name)
        new_img, new_ann = smart_crop(raw_img, deepcopy(xml_tree.getroot()))
        nw, nh = new_img.size
        new_img = new_img.resize((700,700))

        # Update width and height
        w, h = new_img.size
        new_ann.find("size").find("width").text = str(w)
        new_ann.find("size").find("height").text = str(h)

        scaling_matrix = np.array([
            [w/nw, 0],
            [0, h/nh],
        ])

        # Correct issues with the bounding boxes
        for obj in new_ann.findall("object"):
            box = obj.find("bndbox")
            x_min = round(float(box.find("xmin").text))
            y_min = round(float(box.find("ymin").text))
            x_max = round(float(box.find("xmax").text))
            y_max = round(float(box.find("ymax").text))

            # Correct bbox direction
            new_x_min = min(x_min, x_max)
            new_x_max = max(x_min, x_max)
            new_y_min = min(y_min, y_max)
            new_y_max = max(y_min, y_max)

            s_xy_min = np.round(scaling_matrix @ [[new_x_min], [new_y_min]]).flatten().astype(int)
            s_xy_max = np.round(scaling_matrix @ [[new_x_max], [new_y_max]]).flatten().astype(int)

            # Prevent the bounding boxes from being at the edge of the image
            if s_xy_min[0] == 0: s_xy_min[0] = 2
            if s_xy_min[1] == 0: s_xy_min[1] = 2
            if s_xy_max[0] == w: s_xy_max[0] = w - 2
            if s_xy_max[1] == h: s_xy_max[1] = h - 2

            # Correct any issues in the direction of the bounding box coordinates
            box.find("xmin").text = str(s_xy_min[0])
            box.find("xmax").text = str(s_xy_max[0])
            box.find("ymin").text = str(s_xy_min[1])
            box.find("ymax").text = str(s_xy_max[1])


        # Save the Updated XML File
        ET.ElementTree(new_ann).write(save_p+"/"+img_name+".xml")
        new_img.save(save_p+"/"+img_name+".jpg")

In [None]:
fix_annotation_issues("./Split/train/", "./Cropped/train/")
fix_annotation_issues("./Split/test/", "./Cropped/test/")