# Auto-Cropper
Given an image along with its PascalVOC format annotation (XML File), this programme crops out all the objects according to the annotation bounding boxes.

**Pre-requistes:**
- Folder containing images to extract objects from
- Folder containing annotations (XML files) in PascalVOC format

**Library to install:**

pip install opencv-python

### Comment this cell out at the start, set directories, and run

In [None]:
# Root folder
root = 'home/your_root_here'

# XML Labels Folder **relative to root**
XML_LABELS_FOLDER = "datasets/images_nonSensitive/annotations"

# Images & Output folder **relative to root**
image_dir = "datasets/images_nonSensitive"
output_dir = "datasets"

# Runs everything
crop_from_manual_XML()

### Do not edit anything below

In [None]:
import os
import cv2
import xml.etree.ElementTree as ET
from xml.dom import minidom
from datetime import datetime

In [None]:
def crop_from_manual_XML():
    """
    Given a PascalVOC XML annotation file and the asociated image, this script 
    automatically crops out the objects enclosed within the bounding boxes and saves
    them as standalone images.
    """
    xml_folder_dir = os.path.join(root, XML_LABELS_FOLDER)
    xml_folder_name = os.path.basename(os.path.normpath(xml_folder_dir))
    crop_images_output_dir = os.path.join(root, output_dir, 'cropped_samples', f'autocropper_{datetime.now().strftime("%Y_%m_%d-%H.%M.%S")}')
    
    # Create new crop folder in output
    if not os.path.exists(crop_images_output_dir):
        os.mkdir(crop_images_output_dir)

    # Get full list of XML files
    xml_list = [name for name in os.listdir(xml_folder_dir) if os.path.isfile(os.path.join(xml_folder_dir, name))]
    
    for i, xml_file in enumerate(xml_list):
        # Get name of XML file
        xml_filename_noext = os.path.splitext(xml_file)[0]
        
        # Read XML file
        tree = ET.parse(os.path.join(xml_folder_dir, xml_file))
        my_root = tree.getroot()
        
        # Open relevant image from main dataset
        curr_img = cv2.imread(os.path.join(root, image_dir, f'{xml_filename_noext}.png'))
        if curr_img is None:
            curr_img = cv2.imread(os.path.join(root, image_dir, f'{xml_filename_noext}.jpg'))
            if curr_img is None:
                curr_img = cv2.imread(os.path.join(root, image_dir, f'{xml_filename_noext}.tiff'))
                if curr_img is None:
                    curr_img = cv2.imread(os.path.join(root, image_dir, f'{xml_filename_noext}.tif'))
                    if curr_img is None:
                        print(f"Skipped {xml_filename_noext} as image not found or not of right format. XML label must be the same name as image!")
                        continue
        
        
        # Extract bbox coords
        name_dict = {}  # Dictionary containing class names and their num of appearances (for generating filenames)
        for member in my_root.findall('object'):
            
            values = (int(member[4][0].text),
                     int(member[4][1].text),
                     int(member[4][2].text),
                     int(member[4][3].text),
                     member[0].text
                     )
            
            # Create dictionary of names
            if name_dict.get(values[4])==None:
                name_dict[values[4]]=1
            else:
                name_dict[values[4]]+=1
            
            # Crop image
            crop_img = curr_img[values[1]:values[3], values[0]:values[2]]

            # Choose unique filename that also reveals the classname
            filename = f"{xml_filename_noext}_{values[4]}_{name_dict[values[4]]}.png"

            # Save image
            cv2.imwrite(os.path.join(crop_images_output_dir, filename), crop_img)
            
        # Counter
        if (i+1)%2==0 or i==(len(xml_list)-1):
            print(f"{i+1}/{len(xml_list)} processed!")
            
    print("Success! Images cropped. See 'cropped_samples' folder")