### Generate slices

- This will take the train and test JSON coco files generated in notebook 2 and generate tile images + tile-level COCO files

In [4]:

from pathlib import Path
from typing import Dict, List, Optional, Sequence, Tuple, Union

import numpy as np
from PIL import Image
from tqdm import tqdm

## SAHI is dependency. Install by running $ pip install sahi
from shapely.errors import TopologicalError

from sahi.annotation import BoundingBox, Mask
from sahi.utils.coco import Coco, CocoAnnotation, CocoImage, create_coco_dict
from sahi.utils.cv import IMAGE_EXTENSIONS_LOSSLESS, IMAGE_EXTENSIONS_LOSSY, read_image_as_pil
from sahi.utils.file import load_json, save_json

from sahi.slicing import slice_image
from sahi.slicing import *

In [2]:
def _slice_coco(
    coco_annotation_file_path: str,
    image_dir: str,
    output_coco_annotation_file_name: str,
    output_dir: Optional[str] = None,
    ignore_negative_samples: bool = False,
    slice_height: int = 512,
    slice_width: int = 512,
    overlap_height_ratio: float = 0.2,
    overlap_width_ratio: float = 0.2,
    min_area_ratio: float = 0.1,
    out_ext: Optional[str] = None,
    verbose: bool = False,
) -> List[Union[Dict, str]]:
    """
    Slice large images given in a directory, into smaller windows. If out_name is given export sliced images and coco file.

    Args:
        coco_annotation_file_pat (str): Location of the coco annotation file
        image_dir (str): Base directory for the images
        output_coco_annotation_file_name (str): File name of the exported coco
            datatset json.
        output_dir (str, optional): Output directory
        ignore_negative_samples (bool): If True, images without annotations
            are ignored. Defaults to False.
        slice_height (int): Height of each slice. Default 512.
        slice_width (int): Width of each slice. Default 512.
        overlap_height_ratio (float): Fractional overlap in height of each
            slice (e.g. an overlap of 0.2 for a slice of size 100 yields an
            overlap of 20 pixels). Default 0.2.
        overlap_width_ratio (float): Fractional overlap in width of each
            slice (e.g. an overlap of 0.2 for a slice of size 100 yields an
            overlap of 20 pixels). Default 0.2.
        min_area_ratio (float): If the cropped annotation area to original annotation
            ratio is smaller than this value, the annotation is filtered out. Default 0.1.
        out_ext (str, optional): Extension of saved images. Default is the
            original suffix.
        verbose (bool, optional): Switch to print relevant values to screen.
            Default 'False'.

    Returns:
        coco_dict: dict
            COCO dict for sliced images and annotations
        save_path: str
            Path to the saved coco file
    """

    # read coco file
    coco_dict: Dict = load_json(coco_annotation_file_path)
    # create image_id_to_annotation_list mapping
    coco = Coco.from_coco_dict_or_path(coco_dict)
    # init sliced coco_utils.CocoImage list
    sliced_coco_images: List = []

    # iterate over images and slice
    for idx, coco_image in enumerate(tqdm(coco.images[:])):
        # get image path
        image_path: str = os.path.join(image_dir, coco_image.file_name)
        # get annotation json list corresponding to selected coco image
        # slice image
        try:
            slice_image_result = slice_image(
                image=image_path,
                coco_annotation_list=coco_image.annotations,
                output_file_name=f"{Path(coco_image.file_name).stem}_{idx}",
                output_dir=output_dir,
                slice_height=slice_height,
                slice_width=slice_width,
                overlap_height_ratio=overlap_height_ratio,
                overlap_width_ratio=overlap_width_ratio,
                min_area_ratio=min_area_ratio,
                out_ext=out_ext,
                verbose=verbose,
            )
            # append slice outputs
            sliced_coco_images.extend(slice_image_result.coco_images)
        except TopologicalError:
            logger.warning(f"Invalid annotation found, skipping this image: {image_path}")
        except Exception as e:
            logger.error(f"Error while slicing image {image_path}: {e}")

    # create and save coco dict
    coco_dict = create_coco_dict(
        sliced_coco_images, coco_dict["categories"], ignore_negative_samples=ignore_negative_samples
    )
    save_path = ""
    if output_coco_annotation_file_name and output_dir:
        save_path = Path(output_dir) / (output_coco_annotation_file_name + "_coco.json")
        save_json(coco_dict, save_path)

    return coco_dict, save_path

### Generate train set tile dataset

In [6]:

# Set path to the COCO file generated by notebook 2
coco_annotation_file_path = '/media/l3404/Mate/kaza_files/kaza_export_v0/kaza_train.v0.json'

# Set path to the image folder generated by notebook 1
image_dir = '/media/l3404/Mate/kaza_files/kaza_export_v0/exports'

output_coco_annotation_file_name = 'slice_kaza_train-25'
output_dir = '/mnt/space/space-home/wild-me/kaza/slice_kaza_train-25'

coco_dict, coco_path = _slice_coco(
    coco_annotation_file_path=coco_annotation_file_path,
    image_dir=image_dir,
    output_coco_annotation_file_name=output_coco_annotation_file_name,
    output_dir=output_dir,
    slice_height=512,
    slice_width=512,
    overlap_height_ratio=0.2,
    overlap_width_ratio=0.2,
    min_area_ratio=0.25,
    verbose=False
)

indexing coco dataset annotations...


Loading coco annotations: 100%|██████████| 4260/4260 [00:11<00:00, 362.55it/s]
  0%|          | 11/4260 [00:37<4:13:31,  3.58s/it]

In [None]:
/

### Generate test set tile dataset

In [31]:

# Set path to the COCO file generated by notebook 2
coco_annotation_file_path = '/media/l3404/Mate/kaza_files/kaza_export_v0/kaza_val.v0.json'

# Set path to the image folder generated by notebook 1
image_dir = '/media/l3404/Mate/kaza_files/kaza_export_v0/exports'

output_coco_annotation_file_name = 'slice_kaza_val_v0'
output_dir = '/mnt/space/space-home/wild-me/kaza/slice_kaza_val_v0'


# These are the baseline parameters that should work well for most problems
# Possible to experiment with tile size from 480 to 960
# Possible to experiment with overlap from 0.15 to 0.4
# Possible to experiment with min_area_ratio from 0.1 to 0.5
coco_dict, coco_path = _slice_coco(
    coco_annotation_file_path=coco_annotation_file_path,
    image_dir=image_dir,
    output_coco_annotation_file_name=output_coco_annotation_file_name,
    output_dir=output_dir,
    slice_height=512,
    slice_width=512,
    overlap_height_ratio=0.2,
    overlap_width_ratio=0.2,
    min_area_ratio=0.25,
    verbose=False
)

indexing coco dataset annotations...




Loading coco annotations: 100%|██████████| 1065/1065 [00:03<00:00, 275.08it/s]
 25%|██▌       | 270/1065 [17:26<44:42,  3.37s/it]  12/26/2023 14:33:32 - ERROR - sahi.slicing -   Error while slicing image /media/l3404/43b317c3-b94d-4e83-9199-c98069ecabfc/kaza_files/kaza_export_v0/exports/KES22_LJB-R_20220908A-083515_M0909077.jpg: not enough values to unpack (expected 4, got 0)
 50%|█████     | 535/1065 [34:37<36:11,  4.10s/it]12/26/2023 14:50:44 - ERROR - sahi.slicing -   Error while slicing image /media/l3404/43b317c3-b94d-4e83-9199-c98069ecabfc/kaza_files/kaza_export_v0/exports/KES22_WOT-R_20220923B-141727_M0702875.jpg: not enough values to unpack (expected 4, got 0)
 74%|███████▎  | 783/1065 [50:57<17:46,  3.78s/it]12/26/2023 15:07:03 - ERROR - sahi.slicing -   Error while slicing image /media/l3404/43b317c3-b94d-4e83-9199-c98069ecabfc/kaza_files/kaza_export_v0/exports/KES22_IIM-L_20220924A-075842_M0306624.jpg: not enough values to unpack (expected 4, got 0)
100%|██████████| 1065/106