# Convert Sagemaker Ground Truth annotations to Pascal VOC format

This notebook defines a utility class to convert Amazon Sagemaker Ground Truth annotations to pascal voc format. [Pascal VOC](http://host.robots.ox.ac.uk/pascal/VOC/) is one of the standard dataset formats for object detection. The advantage of converting into pascal voc is that the data can be used outside of Sagemaker built-in algorithms smoothly, as many of the object detection models outside have a pascal voc data loading pipeline already available. This will save time in preparing dataloaders.

Define the variables and class mappings. Class mappings should have all your annotation classes in a dictionary format.

In [None]:
destination_folder = (
    "<local destination folder>"  # Replace it with the local folder to store output xmls
)
manifest_file = "<output.manifest>"  # Replace it with the output manifest file from the GT job
job_name = "<job_name>"  # Replace it with the labeling job name
bucket = "<bucket>"  # Replace it with the s3 bucket name
img_folder = "<local image folder>"  # Replace it with the local folder where images are present

CLASS_MAPPING = {
    "0": "<label 1>",  # Replace it with the label
    "1": "<label 2>"  # Replace it with the label
    # Add your remaining classes here.
}

This section defines the class to convert the Ground Truth json to voc xmls.

In [None]:
import os
import boto3
import json
import xml.etree.cElementTree as ET


class Gt2Voc:
    """
    Encapsulates the logic to convert a manifest file to xml files, that are formatted according to the Pascal VOC standard.
    """

    def __init__(
        self,
        destination_folder,
        manifest_s3_key,
        job_name,
        bucket,
        class_mapping,
        img_folder,
        ext=".jpg",
    ):
        """
        Parameters
        ----------
        destination_folder: Folder to which the converted xmls should be saved
        manifest_s3_key: s3 key of the manifest file
        job_name: Annotation job name
        bucket: bucket where manifest
        class_mapping: dictionary of class mapping
        img_folder: folder in which images are present
        ext:file extension
        """
        self.s3 = boto3.resource("s3")
        self.destination_folder = destination_folder
        self.manifest_s3_key = manifest_s3_key
        self.job_name = job_name
        self.bucket = bucket
        self.class_mapping = class_mapping
        self.img_folder = img_folder
        self.ext = ext

    def _create_object_annotation(self, root, voc_labels):
        """Create the object attributes of the xml.
        Parameters
        ----------
        root: root of the xml
        voc_labels: list of annotation details
        Returns
        -------
        root with object details added
        """
        for voc_label in voc_labels:
            obj = ET.SubElement(root, "object")
            ET.SubElement(obj, "name").text = voc_label["class"]
            ET.SubElement(obj, "pose").text = "Unspecified"
            ET.SubElement(obj, "truncated").text = str(0)  # dummy value
            ET.SubElement(obj, "difficult").text = str(0)  # dummy value
            bbox = ET.SubElement(obj, "bndbox")
            ET.SubElement(bbox, "xmin").text = str(int(voc_label["left"]))
            ET.SubElement(bbox, "ymin").text = str(int(voc_label["top"]))
            ET.SubElement(bbox, "xmax").text = str(int(voc_label["right"]))
            ET.SubElement(bbox, "ymax").text = str(int(voc_label["bottom"]))
        return root

    def _create_root(self, file_prefix, width, height):
        """Create the root attributes of the xml.
        Parameters
        ----------
        file_prefix:image filename without extension
        width: width of the image
        height: height of the image
        Returns
        -------
        root of the xml file
        """
        root = ET.Element("annotations")
        ET.SubElement(root, "filename").text = f"{file_prefix}.jpg"
        ET.SubElement(root, "folder").text = self.img_folder
        size = ET.SubElement(root, "size")
        ET.SubElement(size, "width").text = str(width)
        ET.SubElement(size, "height").text = str(height)
        ET.SubElement(size, "depth").text = "3"
        return root

    def _create_and_save_xml(self, file_prefix, width, height, voc_labels):
        """Create the xml file from the parsed json attributes and stores in destination folder.
        Parameters
        ----------
        file_prefix: image filename without extension
        width:width of the image
        height: height of the image
        voc_labels: list of bounding box labels with class ids
        Returns
        -------
        """
        root = self._create_root(file_prefix, width, height)
        root = self._create_object_annotation(root, voc_labels)
        tree = ET.ElementTree(root)
        tree.write(f"{self.destination_folder}/{file_prefix}.xml")
        print(f"Processing completed for file: {file_prefix}.jpg")

    def _read_json(self, img_size, annotations):
        """Reads json for an image, parse it and create the xml file for the same.
        Parameters
        ----------
        file_name:image file name
        img_size: size of the image
        annotations:bounding box annotations for all the found objects
        Returns
        -------
        """
        width, height = img_size

        voc_labels = []
        for item in annotations:
            voc = {}
            voc["class"] = self.class_mapping.get(str(item["class_id"]))
            voc["left"] = item["left"]
            voc["top"] = item["top"]
            voc["right"] = item["left"] + item["width"]
            voc["bottom"] = item["top"] + item["height"]
            voc_labels.append(voc)

        return width, height, voc_labels

    def convert(self):
        """Public function to intitate the conversion process.
        Parameters
        ----------
        Returns
        -------
        """
        if not os.path.exists(self.destination_folder):
            os.makedirs(self.destination_folder)

        s3_obj = self.s3.Object(self.bucket, self.manifest_s3_key)
        manifest = s3_obj.get()["Body"].read().decode("utf-8")
        manifest_lines = manifest.split("\n")

        for line in manifest_lines:
            if len(line) != 0:
                line_json = json.loads(line)
                img_filename = os.path.split(line_json["source-ref"])[1]
                file_prefix = img_filename.split(self.ext)[0]
                image_size = (
                    line_json[self.job_name]["image_size"][0]["width"],
                    line_json[self.job_name]["image_size"][0]["height"],
                )
                width, height, voc_labels = self._read_json(
                    image_size, line_json[self.job_name]["annotations"]
                )
                self._create_and_save_xml(file_prefix, width, height, voc_labels)

Instantiate the class and start the conversion. The run process follows the following order.
1. Import the manifest file from Amazon S3.
2. Parse the manifest file.
3. Read the json part belonging to an image.
4. Parse the json and identify the properties of the image and annotation details of the objects.
5. Create the root of the xml.
6. Include the object annotations details in the xml.
7. Save the xml file to the destination folder.

In [None]:
converter = Gt2Voc(
    destination_folder=destination_folder,
    manifest_s3_key=manifest_file,
    job_name=job_name,
    bucket=bucket,
    class_mapping=CLASS_MAPPING,
    img_folder=img_folder,
)

In [None]:
converter.convert()

Once the conversion completes, one xml file corresponding to each image will be generated in the destination folder. The xml file will have the same name as the image file name(.jpg/.png will be replaced with .xml).

The content of the xml should look like this.

<pre><code>&lt;?xml version="1.0" ?&gt;
&lt;annotations&gt;
  &lt;filename&gt;test.jpg&lt;/filename&gt;
  &lt;folder&gt;images&lt;/folder&gt;
  &lt;size&gt;
    &lt;width&gt;425&lt;/width&gt;
    &lt;height&gt;700&lt;/height&gt;
    &lt;depth&gt;3&lt;/depth&gt;
  &lt;/size&gt;
  &lt;object&gt;
    &lt;name&gt;label-1&lt;/name&gt;
    &lt;pose&gt;Unspecified&lt;/pose&gt;
    &lt;truncated&gt;0&lt;/truncated&gt;
    &lt;difficult&gt;0&lt;/difficult&gt;
    &lt;bndbox&gt;
      &lt;xmin&gt;1&lt;/xmin&gt;
      &lt;ymin&gt;4&lt;/ymin&gt;
      &lt;xmax&gt;422&lt;/xmax&gt;
      &lt;ymax&gt;698&lt;/ymax&gt;
    &lt;/bndbox&gt;
  &lt;/object&gt;
&lt;/annotations&gt;</code></pre>
