## Dataset building

In [3]:
import logging
import pathlib
import sys
import warnings
from typing import Dict, List

import numpy as np
from torchvision.models.detection.transform import GeneralizedRCNNTransform

from src.pytorch_faster_rcnn_tutorial.datasets import ObjectDetectionDataSet
from src.pytorch_faster_rcnn_tutorial.transformations import (
    Clip,
    ComposeDouble,
    FunctionWrapperDouble,
    normalize_01,
)
from src.pytorch_faster_rcnn_tutorial.utils import get_filenames_of_path, stats_dataset
from src.pytorch_faster_rcnn_tutorial.viewers.object_detection_viewer import (
    ObjectDetectionViewer,
)

In [4]:
warnings.filterwarnings("ignore")

logger: logging.Logger = logging.getLogger(__name__)

# logging
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(filename)s:%(lineno)d:%(funcName)s: %(message)s",
    datefmt="%Y-%m-%d %H:%M:%S",
    handlers=[logging.StreamHandler(sys.stdout)],
)

In [5]:
# root directory
data_path = pathlib.Path("src/pytorch_faster_rcnn_tutorial/data/heads")

In [6]:
# input and target files
inputs: List[pathlib.Path] = get_filenames_of_path(data_path / "input")
targets: List[pathlib.Path] = get_filenames_of_path(data_path / "target")

inputs.sort()
targets.sort()

2024-02-27 17:27:11 - INFO - utils.py:27:get_filenames_of_path: Found 20 files in src\pytorch_faster_rcnn_tutorial\data\heads\input
2024-02-27 17:27:11 - INFO - utils.py:27:get_filenames_of_path: Found 19 files in src\pytorch_faster_rcnn_tutorial\data\heads\target


In [7]:
# mapping
mapping = {
    "head": 1,
}

In [8]:
# transforms
transforms: ComposeDouble = ComposeDouble(
    [
        Clip(),
        # AlbumentationWrapper(albumentation=A.HorizontalFlip(p=0.5)),
        # AlbumentationWrapper(albumentation=A.RandomScale(p=0.5, scale_limit=0.5)),
        # AlbumentationWrapper(albumentation=A.VerticalFlip(p=0.5)),
        FunctionWrapperDouble(np.moveaxis, source=-1, destination=0),
        FunctionWrapperDouble(normalize_01),
    ]
)

In [9]:
# dataset building
dataset: ObjectDetectionDataSet = ObjectDetectionDataSet(
    inputs=inputs,
    targets=targets,
    transform=transforms,
    use_cache=False,
    convert_to_format=None,
    mapping=mapping,
)

In [10]:
# visualize dataset
color_mapping: Dict[int, str] = {
    1: "red",
}

transform: GeneralizedRCNNTransform = GeneralizedRCNNTransform(
    min_size=1024,
    max_size=1024,
    image_mean=[0.485, 0.456, 0.406],
    image_std=[0.229, 0.224, 0.225],
)

object_detection_viewer_rcnn: ObjectDetectionViewer = ObjectDetectionViewer(
    dataset=dataset, color_mapping=color_mapping, rcnn_transform=transform
)
## press 'n' or 'b' to see images

## Dataset statistics

In [11]:
stats = stats_dataset(dataset)
stats

{'image_height': tensor([2048., 2048., 2048., 2048., 2048., 2048., 2048., 2048., 2048., 2048.,
         2048., 2048., 2048., 2048., 2048., 2048., 2048., 2048., 2048.]),
 'image_width': tensor([2448., 2448., 2448., 2448., 2448., 2448., 2448., 2448., 2448., 2448.,
         2448., 2448., 2448., 2448., 2448., 2448., 2448., 2448., 2448.]),
 'image_mean': tensor([0.5053, 0.4261, 0.4785, 0.4576, 0.4364, 0.4267, 0.4215, 0.4000, 0.4134,
         0.4228, 0.4632, 0.4082, 0.3713, 0.4192, 0.2928, 0.3871, 0.4503, 0.4324,
         0.4388]),
 'image_std': tensor([0.2547, 0.2643, 0.2290, 0.2626, 0.2229, 0.2424, 0.2602, 0.2501, 0.2490,
         0.2731, 0.2392, 0.2527, 0.2255, 0.2532, 0.2257, 0.2382, 0.2566, 0.2610,
         0.2690]),
 'boxes_height': tensor([63, 73, 63,  ..., 51, 48, 88]),
 'boxes_width': tensor([47, 62, 78,  ..., 63, 55, 63]),
 'boxes_num': tensor([ 80.,  51., 119.,  96., 139., 161.,  87., 103., 147.,  58., 127., 101.,
         116., 105.,  86., 129., 137., 148., 136.]),
 'boxes_area':

In [12]:
transform = GeneralizedRCNNTransform(
    min_size=1024,
    max_size=1024,
    image_mean=[0.485, 0.456, 0.406],
    image_std=[0.229, 0.224, 0.225],
)

stats_transform = stats_dataset(dataset, transform)
stats_transform

{'image_height': tensor([864., 864., 864., 864., 864., 864., 864., 864., 864., 864., 864., 864.,
         864., 864., 864., 864., 864., 864., 864.]),
 'image_width': tensor([1024., 1024., 1024., 1024., 1024., 1024., 1024., 1024., 1024., 1024.,
         1024., 1024., 1024., 1024., 1024., 1024., 1024., 1024., 1024.]),
 'image_mean': tensor([ 0.2475, -0.0994,  0.1307,  0.0388, -0.0540, -0.0969, -0.1197, -0.2140,
         -0.1550, -0.1141,  0.0635, -0.1781, -0.3395, -0.1300, -0.6839, -0.2706,
          0.0068, -0.0719, -0.0437]),
 'image_std': tensor([1.1182, 1.1595, 0.9449, 1.1541, 0.9304, 1.0478, 1.1427, 1.0920, 1.0861,
         1.2003, 1.0300, 1.1090, 0.9777, 1.1110, 0.9902, 1.0369, 1.1252, 1.1452,
         1.1789]),
 'boxes_height': tensor([26.3529, 30.5359, 26.3529,  ..., 21.3333, 20.0784, 36.8105]),
 'boxes_width': tensor([19.6445, 25.9141, 32.6016,  ..., 26.3320, 22.9883, 26.3320]),
 'boxes_num': tensor([ 80.,  51., 119.,  96., 139., 161.,  87., 103., 147.,  58., 127., 101.,
       

2024-02-27 17:28:52 - INFO - object_detection_viewer.py:42:get_data: Input sample: 002.tif
Shape: torch.Size([3, 2048, 2448])
2024-02-27 17:28:52 - INFO - object_detection_viewer.py:49:get_data: Transformed input sample: 002.tif
Shape: torch.Size([3, 864, 1024])
2024-02-27 17:28:52 - INFO - object_detection_viewer.py:64:get_target: Target sample: 003.json
{'boxes': tensor([[1422,  187, 1502,  270],
        [ 976,  300, 1095,  395],
        [ 652,  292,  788,  385],
        [ 511,  453,  642,  544],
        [ 599,  534,  685,  644],
        [ 690,  582,  778,  660],
        [ 780,  556,  838,  622],
        [ 780,  725,  931,  883],
        [1145,  717, 1203,  780],
        [1510,  665, 1613,  775],
        [1613,  748, 1774,  886],
        [1789,  851, 1852,  911],
        [1990,  846, 2159, 1017],
        [2108, 1037, 2209, 1163],
        [1890,  682, 1990,  765],
        [2375, 1100, 2438, 1223],
        [1658, 1869, 1797, 2013],
        [1165, 1553, 1231, 1615],
        [1132, 1422,