## Dataset building

In [13]:
from typing import Any, Dict, Tuple

import numpy as np
import torch
from napari.layers import Shapes
from overrides import overrides
from torch.utils.data import Dataset
from torchvision.models.detection.transform import GeneralizedRCNNTransform
from torchvision.ops import box_convert

from pytorch_faster_rcnn_tutorial.anchor_generator import get_anchor_boxes
from pytorch_faster_rcnn_tutorial.transformations import re_normalize
from pytorch_faster_rcnn_tutorial.utils import stats_dataset
from pytorch_faster_rcnn_tutorial.viewers.dataset_viewer import (
    DatasetViewer,
    make_bbox_napari,
)

In [3]:
warnings.filterwarnings("ignore")

logger: logging.Logger = logging.getLogger(__name__)

# logging
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(filename)s:%(lineno)d:%(funcName)s: %(message)s",
    datefmt="%Y-%m-%d %H:%M:%S",
    handlers=[logging.StreamHandler(sys.stdout)],
)

In [6]:
# root directory
data_path = pathlib.Path("src/pytorch_faster_rcnn_tutorial/data/heads")

In [7]:
# input and target files
inputs: List[pathlib.Path] = get_filenames_of_path(data_path / "input")
targets: List[pathlib.Path] = get_filenames_of_path(data_path / "target")

inputs.sort()
targets.sort()

2023-06-08 16:24:13 - INFO - utils.py:27:get_filenames_of_path: Found 20 files in src/pytorch_faster_rcnn_tutorial/data/heads/input
2023-06-08 16:24:13 - INFO - utils.py:27:get_filenames_of_path: Found 20 files in src/pytorch_faster_rcnn_tutorial/data/heads/target


In [8]:
# mapping
mapping = {
    "head": 1,
}

In [9]:
# transforms
transforms: ComposeDouble = ComposeDouble(
    [
        Clip(),
        # AlbumentationWrapper(albumentation=A.HorizontalFlip(p=0.5)),
        # AlbumentationWrapper(albumentation=A.RandomScale(p=0.5, scale_limit=0.5)),
        # AlbumentationWrapper(albumentation=A.VerticalFlip(p=0.5)),
        FunctionWrapperDouble(np.moveaxis, source=-1, destination=0),
        FunctionWrapperDouble(normalize_01),
    ]
)

In [10]:
# dataset building
dataset: ObjectDetectionDataSet = ObjectDetectionDataSet(
    inputs=inputs,
    targets=targets,
    transform=transforms,
    use_cache=False,
    convert_to_format=None,
    mapping=mapping,
)

In [11]:
# visualize dataset
color_mapping: Dict[int, str] = {
    1: "red",
}

transform: GeneralizedRCNNTransform = GeneralizedRCNNTransform(
    min_size=1024,
    max_size=1024,
    image_mean=[0.485, 0.456, 0.406],
    image_std=[0.229, 0.224, 0.225],
)

object_detection_viewer_rcnn: ObjectDetectionViewer = ObjectDetectionViewer(
    dataset=dataset, color_mapping=color_mapping, rcnn_transform=transform
)

2023-06-08 16:24:19 - INFO - object_detection_viewer.py:43:get_data: Input sample: 001.jpg
Shape: torch.Size([3, 710, 1024])
2023-06-08 16:24:19 - INFO - object_detection_viewer.py:50:get_data: Transformed input sample: 001.jpg
Shape: torch.Size([3, 736, 1024])
2023-06-08 16:24:19 - INFO - object_detection_viewer.py:65:get_target: Target sample: 001.json
{'boxes': tensor([[ 14, 217, 277, 531],
        [199,  81, 396, 287],
        [386,   2, 588, 247],
        [306, 251, 510, 521],
        [525, 266, 741, 554],
        [723, 116, 925, 432]]), 'labels': tensor([1, 1, 1, 1, 1, 1])}
2023-06-08 16:24:19 - INFO - object_detection_viewer.py:72:get_target: Transformed target sample: 001.json
{'boxes': tensor([[ 14., 217., 277., 531.],
        [199.,  81., 396., 287.],
        [386.,   2., 588., 247.],
        [306., 251., 510., 521.],
        [525., 266., 741., 554.],
        [723., 116., 925., 432.]]), 'labels': tensor([1, 1, 1, 1, 1, 1])}
2023-06-08 16:24:20 - INFO - object_detection_viewer

## Dataset statistics

In [14]:
stats = stats_dataset(dataset)
stats

{'image_height': tensor([1000.,  710.,  440.,  565.,  570.,  417.,  412.,  333.,  450.,  533.,
          667.,  408.,  408.,  408.,  394., 1200.,  700.,  422.,  405.,  416.]),
 'image_width': tensor([1000., 1024.,  660.,  581.,  800.,  625.,  550.,  500.,  338.,  800.,
         1000.,  612.,  612.,  612.,  526., 1200., 1100.,  759.,  720.,  626.]),
 'image_mean': tensor([0.6344, 0.4143, 0.6635, 0.4409, 0.7367, 0.4340, 0.5066, 0.4894, 0.4830,
         0.5201, 0.5292, 0.4543, 0.4776, 0.5529, 0.4662, 0.4136, 0.4097, 0.5390,
         0.3745, 0.5882]),
 'image_std': tensor([0.2600, 0.2683, 0.3109, 0.2454, 0.2810, 0.3352, 0.2604, 0.3031, 0.3133,
         0.2383, 0.2960, 0.3023, 0.2607, 0.2253, 0.3302, 0.2489, 0.3026, 0.3484,
         0.2674, 0.2747]),
 'boxes_height': tensor([110, 119, 114, 113, 124, 120, 109, 118, 119, 115, 114, 117, 119, 113,
         138, 120, 115, 106, 103, 121, 115, 115, 124, 114, 144, 263, 197, 202,
         204, 216, 202,  62,  69,  57,  55,  55,  57,  67,  87,  68,  

In [15]:
transform = GeneralizedRCNNTransform(
    min_size=1024,
    max_size=1024,
    image_mean=[0.485, 0.456, 0.406],
    image_std=[0.229, 0.224, 0.225],
)

stats_transform = stats_dataset(dataset, transform)
stats_transform

{'image_height': tensor([1024.,  736.,  704., 1024.,  736.,  704.,  768.,  704., 1024.,  704.,
          704.,  704.,  704.,  704.,  768., 1024.,  672.,  576.,  576.,  704.]),
 'image_width': tensor([1024., 1024., 1024., 1024., 1024., 1024., 1024., 1024.,  800., 1024.,
         1024., 1024., 1024., 1024., 1024., 1024., 1024., 1024., 1024., 1024.]),
 'image_mean': tensor([ 0.8202, -0.1485,  0.9195, -0.0364,  1.2597, -0.0650,  0.2541,  0.1718,
          0.1446,  0.3035,  0.3443,  0.0213,  0.1232,  0.4450,  0.0727, -0.1546,
         -0.1703,  0.3933, -0.3299,  0.5940]),
 'image_std': tensor([1.1306, 1.1575, 1.3574, 1.0315, 1.2335, 1.4332, 1.1415, 1.2923, 1.3503,
         0.9977, 1.2812, 1.2765, 1.1355, 0.9675, 1.3983, 1.1207, 1.2861, 1.5048,
         1.1773, 1.1885]),
 'boxes_height': tensor([112.6400, 121.8560, 116.7360, 115.7120, 126.9760, 122.8800, 111.6160,
         120.8320, 121.8560, 117.7600, 116.7360, 119.8080, 121.8560, 115.7120,
         141.3120, 122.8800, 117.7600, 108.5440, 1