In [1]:
!pip install albumentations

Collecting albumentations
  Using cached albumentations-1.4.7-py3-none-any.whl.metadata (37 kB)
Collecting scikit-image>=0.21.0 (from albumentations)
  Using cached scikit_image-0.23.2-cp310-cp310-win_amd64.whl.metadata (14 kB)
Collecting scikit-learn>=1.3.2 (from albumentations)
  Using cached scikit_learn-1.4.2-cp310-cp310-win_amd64.whl.metadata (11 kB)
Collecting pydantic>=2.7.0 (from albumentations)
  Using cached pydantic-2.7.1-py3-none-any.whl.metadata (107 kB)
Collecting opencv-python-headless>=4.9.0 (from albumentations)
  Using cached opencv_python_headless-4.9.0.80-cp37-abi3-win_amd64.whl.metadata (20 kB)
Collecting annotated-types>=0.4.0 (from pydantic>=2.7.0->albumentations)
  Using cached annotated_types-0.6.0-py3-none-any.whl.metadata (12 kB)
Collecting networkx>=2.8 (from scikit-image>=0.21.0->albumentations)
  Using cached networkx-3.3-py3-none-any.whl.metadata (5.1 kB)
Collecting imageio>=2.33 (from scikit-image>=0.21.0->albumentations)
  Using cached imageio-2.34.1-py

In [2]:
!pip install torch

Collecting torch
  Using cached torch-2.3.0-cp310-cp310-win_amd64.whl.metadata (26 kB)
Collecting filelock (from torch)
  Using cached filelock-3.14.0-py3-none-any.whl.metadata (2.8 kB)
Collecting sympy (from torch)
  Using cached sympy-1.12-py3-none-any.whl.metadata (12 kB)
Collecting jinja2 (from torch)
  Using cached jinja2-3.1.4-py3-none-any.whl.metadata (2.6 kB)
Collecting fsspec (from torch)
  Using cached fsspec-2024.5.0-py3-none-any.whl.metadata (11 kB)
Collecting mkl<=2021.4.0,>=2021.1.1 (from torch)
  Using cached mkl-2021.4.0-py2.py3-none-win_amd64.whl.metadata (1.4 kB)
Collecting intel-openmp==2021.* (from mkl<=2021.4.0,>=2021.1.1->torch)
  Using cached intel_openmp-2021.4.0-py2.py3-none-win_amd64.whl.metadata (1.2 kB)
Collecting tbb==2021.* (from mkl<=2021.4.0,>=2021.1.1->torch)
  Using cached tbb-2021.12.0-py3-none-win_amd64.whl.metadata (1.1 kB)
Collecting mpmath>=0.19 (from sympy->torch)
  Using cached mpmath-1.3.0-py3-none-any.whl.metadata (8.6 kB)
Using cached torch-2

In [3]:
import os
import xml.etree.ElementTree as ET
import cv2
import albumentations as A
from albumentations.pytorch import ToTensorV2

In [None]:
# Function to parse XML files and extract bounding boxes
def parse_voc_xml(xml_file):
    tree = ET.parse(xml_file)
    root = tree.getroot()

    boxes = []
    for member in root.findall("object"):
        xmin = int(member.find("bndbox").find("xmin").text)
        ymin = int(member.find("bndbox").find("ymin").text)
        xmax = int(member.find("bndbox").find("xmax").text)
        ymax = int(member.find("bndbox").find("ymax").text)
        boxes.append([xmin, ymin, xmax, ymax])

    return boxes, tree


# Define the augmentation pipeline
transform = A.Compose(
    [
        A.Resize(height=350, width=350),
    ],
    bbox_params=A.BboxParams(format="pascal_voc", label_fields=["class_labels"]),
)


# Function to process a single image and its XML annotation
def process_image(image_path, xml_path):
    # Load the image
    image = cv2.imread(image_path)

    # Parse the XML file to get bounding boxes
    boxes, tree = parse_voc_xml(xml_path)
    class_labels = [0] * len(boxes)  # Replace with actual class labels if available

    # Apply the augmentation
    transformed = transform(image=image, bboxes=boxes, class_labels=class_labels)

    transformed_image = transformed["image"]
    transformed_bboxes = transformed["bboxes"]

    # Save the augmented image and create a new XML file with the updated bounding boxes
    augmented_image_path = image_path.replace("images", "augmented_images")
    augmented_xml_path = xml_path.replace("images", "augmented_images")

    os.makedirs(os.path.dirname(augmented_image_path), exist_ok=True)

    cv2.imwrite(augmented_image_path, transformed_image)

    create_augmented_xml(augmented_xml_path, transformed_bboxes, tree)


# Function to create a new XML file with updated bounding boxes
def create_augmented_xml(xml_path, bboxes, original_tree):
    root = original_tree.getroot()

    for i, member in enumerate(root.findall("object")):
        bbox = member.find("bndbox")
        bbox.find("xmin").text = str(bboxes[i][0])
        bbox.find("ymin").text = str(bboxes[i][1])
        bbox.find("xmax").text = str(bboxes[i][2])
        bbox.find("ymax").text = str(bboxes[i][3])

    original_tree.write(xml_path)


# Traverse directories and process images
base_dir = "Tensorflow/workspace/images/collectedimages"
for root, dirs, files in os.walk(base_dir):
    for file in files:
        if file.endswith(".jpg"):
            image_path = os.path.join(root, file)
            xml_path = image_path.replace(".jpg", ".xml")

            if os.path.exists(xml_path):
                process_image(image_path, xml_path)