In [1]:
import os
import cv2
import numpy as np
from lxml import etree
from typing import List, Dict, Tuple

In [2]:
# Type aliases for clarity
BoundingBox = Tuple[Tuple[int, int], Tuple[int, int]]
AdditionalInfo = Dict[str, BoundingBox]

In [3]:
# Cell 2: XML Annotation Creation Function
def create_xml_annotation(image_path: str, bounding_boxes: List[BoundingBox], additional_info: AdditionalInfo, output_dir: str) -> None:
    filename = os.path.basename(image_path)
    img = cv2.imread(image_path)
    if img is None:
        raise ValueError(f"Unable to read image: {image_path}")
    height, width = img.shape[:2]

    annotation = etree.Element("annotation")
    etree.SubElement(annotation, "folder").text = "processed"
    etree.SubElement(annotation, "filename").text = filename
    etree.SubElement(annotation, "path").text = image_path

    size = etree.SubElement(annotation, "size")
    etree.SubElement(size, "width").text = str(width)
    etree.SubElement(size, "height").text = str(height)
    etree.SubElement(size, "depth").text = "3"

    def add_object(name: str, box: BoundingBox) -> None:
        obj = etree.SubElement(annotation, "object")
        etree.SubElement(obj, "name").text = name
        etree.SubElement(obj, "pose").text = "Unspecified"
        etree.SubElement(obj, "truncated").text = "0"
        etree.SubElement(obj, "difficult").text = "0"

        bndbox = etree.SubElement(obj, "bndbox")
        etree.SubElement(bndbox, "xmin").text = str(box[0][0])
        etree.SubElement(bndbox, "ymin").text = str(box[0][1])
        etree.SubElement(bndbox, "xmax").text = str(box[1][0])
        etree.SubElement(bndbox, "ymax").text = str(box[1][1])

    # Add bounding boxes for tables
    for box in bounding_boxes:
        add_object("table", box)

    # Add location and year as separate objects if provided
    for key in ['location', 'year']:
        if key in additional_info:
            add_object(key, additional_info[key])

    tree = etree.ElementTree(annotation)
    output_xml_path = os.path.join(output_dir, f"{os.path.splitext(filename)[0]}.xml")
    tree.write(output_xml_path, pretty_print=True, encoding='utf-8')
    print(f"Generated XML annotation: {output_xml_path}")

In [4]:
class ImageAnnotator:
    def __init__(self, image_path: str):
        self.image_path = image_path
        self.image = cv2.imread(image_path)
        if self.image is None:
            raise ValueError(f"Unable to read image: {image_path}")
        self.original_image = self.image.copy()

        # Set the scaling factor for display
        self.display_scale = self.calculate_scale_factor()

        self.bounding_boxes: List[BoundingBox] = []
        self.additional_info: AdditionalInfo = {}
        self.current_box: List[Tuple[int, int]] = []
        self.annotation_type: str = "table"  # Default annotation type
        self.is_drawing = False
        self.preview_image = None

    def calculate_scale_factor(self, max_width=1000, max_height=800):
        """Calculate the scale factor to resize the image for display."""
        height, width = self.image.shape[:2]
        scale = min(max_width / width, max_height / height)
        return scale if scale < 1 else 1

    def draw_bounding_box(self, event, x, y, flags, param):
        scaled_x, scaled_y = int(x / self.display_scale), int(y / self.display_scale)
        
        if event == cv2.EVENT_LBUTTONDOWN:
            self.current_box = [(scaled_x, scaled_y)]
            self.is_drawing = True
        elif event == cv2.EVENT_MOUSEMOVE:
            if self.is_drawing and self.current_box:
                self.update_preview(scaled_x, scaled_y)
        elif event == cv2.EVENT_LBUTTONUP:
            if self.is_drawing and self.current_box:
                self.current_box.append((scaled_x, scaled_y))
                self.bounding_boxes.append(tuple(self.current_box))
                self.finalize_box()
                self.is_drawing = False
                self.current_box = []
        
        self.update_display()

    def update_preview(self, x, y):
        img_copy = self.image.copy()
        color = self.get_color_for_annotation(self.annotation_type)
        cv2.rectangle(img_copy, self.current_box[0], (x, y), color, 6)
        self.preview_image = img_copy

    def finalize_box(self):
        color = self.get_color_for_annotation(self.annotation_type)
        cv2.rectangle(self.image, self.bounding_boxes[-1][0], self.bounding_boxes[-1][1], color, 6)

    def update_display(self):
        if self.preview_image is not None:
            display_image = cv2.resize(self.preview_image, (0, 0), fx=self.display_scale, fy=self.display_scale)
        else:
            display_image = cv2.resize(self.image, (0, 0), fx=self.display_scale, fy=self.display_scale)
        
        self.show_info(display_image)
        cv2.imshow("Image", display_image)
        cv2.waitKey(1)  # This line is crucial for the display to update

    def get_color_for_annotation(self, annotation_type: str):
        """Return the color associated with the annotation type."""
        if annotation_type == "table":
            return (0, 255, 0)  # Green for table
        elif annotation_type == "location":
            return (255, 0, 0)  # Blue for location
        elif annotation_type == "year":
            return (0, 0, 255)  # Red for year
        return (255, 255, 255)  # White as default (should not happen)

    def show_info(self, image):
        """Display information on the image."""
        info_lines = [
            f"Image: {os.path.basename(self.image_path)}",
            f"Marking: {self.annotation_type.capitalize()}",
            "Controls:",
            "T: Table, L: Location, Y: Year",
            "Ctrl+Z: Undo, Ctrl+R: Reset",
            "C: Confirm, Q: Quit"
        ]
        for i, line in enumerate(info_lines):
            cv2.putText(image, line, (10, 20 + i*20), 
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA)

    def undo_last_box(self):
        """Undo the last bounding box."""
        if self.bounding_boxes:
            self.bounding_boxes.pop()
            self.redraw_all_boxes()
        self.preview_image = None  # Clear any preview
        self.update_display()

    def clear_all_boxes(self):
        """Clear all bounding boxes."""
        self.bounding_boxes = []
        self.additional_info = {}  # Also clear additional info
        self.image = self.original_image.copy()
        self.preview_image = None  # Clear any preview
        self.update_display()

    def redraw_all_boxes(self):
        """Redraw all bounding boxes on the image."""
        self.image = self.original_image.copy()
        for box in self.bounding_boxes:
            color = self.get_color_for_annotation(self.annotation_type)
            cv2.rectangle(self.image, box[0], box[1], color, 6)

    def annotate(self) -> Tuple[List[BoundingBox], AdditionalInfo]:
        cv2.namedWindow("Image")
        cv2.setMouseCallback("Image", self.draw_bounding_box)

        while True:
            self.update_display()
            key = cv2.waitKey(1) & 0xFF
            
            if key == ord('q'):  # Quit
                break
            elif key == ord('c'):  # Confirm
                break
            elif key == ord('t'):  # Annotating table
                self.annotation_type = "table"
            elif key == ord('l'):  # Annotating location
                self.annotation_type = "location"
                if self.bounding_boxes:
                    self.additional_info['location'] = self.bounding_boxes.pop()
                    self.redraw_all_boxes()
            elif key == ord('y'):  # Annotating year
                self.annotation_type = "year"
                if self.bounding_boxes:
                    self.additional_info['year'] = self.bounding_boxes.pop()
                    self.redraw_all_boxes()
            elif key == 26:  # Ctrl+Z for undo
                self.undo_last_box()
            elif key == 18:  # Ctrl+R for reset
                self.clear_all_boxes()
                self.annotation_type = "table"

            self.update_display()  # Ensure display is updated after each action

        cv2.destroyAllWindows()
        return self.bounding_boxes, self.additional_info

In [5]:
# Cell 4: Function to Annotate Images in a Directory
def annotate_images_in_directory(directory: str, output_xml_dir: str) -> None:
    os.makedirs(output_xml_dir, exist_ok=True)

    for filename in os.listdir(directory):
        if filename.lower().endswith((".jpg", ".jpeg", ".png", ".bmp")):
            image_path = os.path.join(directory, filename)
            try:
                annotator = ImageAnnotator(image_path)
                bounding_boxes, additional_info = annotator.annotate()
                create_xml_annotation(image_path, bounding_boxes, additional_info, output_xml_dir)
            except Exception as e:
                print(f"Error processing {filename}: {str(e)}")

In [None]:
# Cell 5: Main Execution (can be modified as needed)
# This cell can be run to process all images in a directory
data_directory = "data/processed"  # Replace with your directory path
output_xml_directory = "data/xml"  # Directory for saving XML files
annotate_images_in_directory(data_directory, output_xml_directory)

Generated XML annotation: data/xml\DSC00914_Processed.xml
Generated XML annotation: data/xml\DSC00915_Processed.xml


In [None]:
# Cell 6: Example usage for a single image (can be modified as needed)
# Uncomment and modify this cell to annotate a single image
"""
image_path = "path/to/your/image.jpg"
output_dir = "path/to/output/directory"

try:
    annotator = ImageAnnotator(image_path)
    bounding_boxes, additional_info = annotator.annotate()
    create_xml_annotation(image_path, bounding_boxes, additional_info, output_dir)
    print("Annotation completed successfully!")
except Exception as e:
    print(f"Error occurred: {str(e)}")
"""