This notebook is used to preprocess the CocoDoom dataset to allow for faster training.

Each image will be loaded, preprocessed, and saved as a tensor shard in the same location as the original images.

In [1]:
# Add project directory to path for imports
import sys
import os
sys.path.append(os.path.join(os.pardir))

from PIL import Image
from Vision.datasets import CocoDoomDataset
from transformers import DetrImageProcessor

import torch

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# create preprocessor
processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")

# create dataset instance
dataset = CocoDoomDataset(
    data_dir=os.path.join(os.pardir, os.pardir, "datasets", "cocodoom"),
    annotation_file_name="run-train.json",
    processor=processor
)

loading annotations into memory...
Done (t=0.94s)
creating index...
index created!
loading annotations into memory...
Done (t=0.95s)
creating index...
index created!
Loaded run-train.json
Number of images: 50732
Number of Categories: 94


In [6]:
for i in range(1000):
    # fetch the image
    image, target, img_file_name = dataset.get_image(i)
    # ann = dataset.get_annotation(i)

    print(f"Image {i} - file name: {img_file_name}")
    print(f"Target (before): {target}")

    # preprocess the image
    encoding = processor(
        images=image,
        annotations=target,
        return_tensors="pt"
    )

    pixel_values = encoding['pixel_values'].squeeze()
    target = encoding['labels'][0]
    
    print(f"Pixel values shape: {pixel_values.shape}")
    print(f"Target: {target}")

    # modify file name to have .pt extension
    pt_file_name = os.path.splitext(img_file_name)[0] + ".pt"

    print(f"Image {i} processed with shape: {encoding['pixel_values'].shape}")
    save_path = os.path.join(
        os.pardir, os.pardir,
        "datasets", "cocodoom", "preprocessed", pt_file_name)
    print(f"Saving to: {save_path}")

    if not os.path.exists(os.path.dirname(save_path)):
        os.makedirs(os.path.dirname(save_path))

    # save the processed data
    torch.save(
        {
            "pixel_values": pixel_values,
            "labels": target
        },
        save_path
    )

Image 0 - file name: run1/map01/rgb/000002.png
Target (before): {'image_id': 1010000002, 'annotations': [{'id': 1010000002000004, 'image_id': 1010000002, 'category_id': 1, 'segmentation': [[131, 117, 136, 119, 137, 116, 140, 116, 141, 112, 143, 112, 144, 116, 147, 117, 146, 130, 144, 131, 145, 136, 142, 136, 142, 134, 139, 135, 138, 122, 136, 122, 133, 119, 131, 119, 131, 117]], 'area': 180, 'bbox': [130, 112, 17, 25], 'iscrowd': 0}, {'id': 1010000002000005, 'image_id': 1010000002, 'category_id': 1, 'segmentation': [[169, 108, 173, 110, 173, 108, 176, 107, 177, 104, 179, 104, 179, 107, 182, 108, 183, 112, 181, 113, 182, 116, 180, 118, 180, 123, 176, 122, 176, 120, 175, 120, 175, 112, 173, 112, 169, 108]], 'area': 115, 'bbox': [168, 104, 15, 20], 'iscrowd': 0}]}
Pixel values shape: torch.Size([3, 800, 1280])
Target: {'size': tensor([ 800, 1280]), 'image_id': tensor([1010000002]), 'class_labels': tensor([1, 1]), 'boxes': tensor([[0.4328, 0.6225, 0.0531, 0.1250],
        [0.5484, 0.5700, 

In [4]:
dataset.get_preprocessed_item(0)

Loading preprocessed data from: ../../datasets/cocodoom/preprocessed/run1/map01/rgb/000002.pt


(tensor([[[-1.9809, -1.9809, -1.9638,  ..., -1.9809, -1.9809, -1.9809],
          [-1.9809, -1.9809, -1.9638,  ..., -1.9809, -1.9809, -1.9809],
          [-1.9809, -1.9809, -1.9638,  ..., -1.9809, -1.9809, -1.9809],
          ...,
          [-1.6042, -1.6042, -1.6042,  ..., -1.3644, -1.3644, -1.3644],
          [-1.6384, -1.6384, -1.6384,  ..., -1.3644, -1.3644, -1.3644],
          [-1.6384, -1.6384, -1.6384,  ..., -1.3644, -1.3644, -1.3644]],
 
         [[-1.8957, -1.8957, -1.8782,  ..., -1.8957, -1.8957, -1.8957],
          [-1.8957, -1.8957, -1.8782,  ..., -1.8957, -1.8957, -1.8957],
          [-1.8957, -1.8957, -1.8782,  ..., -1.8957, -1.8957, -1.8957],
          ...,
          [-1.5280, -1.5280, -1.5280,  ..., -1.4055, -1.4055, -1.4055],
          [-1.5455, -1.5455, -1.5455,  ..., -1.4055, -1.4055, -1.4055],
          [-1.5455, -1.5455, -1.5455,  ..., -1.4055, -1.4055, -1.4055]],
 
         [[-1.6650, -1.6650, -1.6476,  ..., -1.6650, -1.6650, -1.6650],
          [-1.6650, -1.6650,

In [5]:
# benchmarking __getitem__ when including preprocessing
import time

start_time = time.perf_counter()
for i in range(1000):
    pixel_values, labels = dataset[i]
end_time = time.perf_counter()
elapsed_time = end_time - start_time
print(f"Time taken to fetch 1000 items: {elapsed_time:.2f} seconds")

Time taken to fetch 1000 items: 13.96 seconds


In [7]:
# benchmarking __getitem__ without preprocessing
import time

start_time = time.perf_counter()
for i in range(1000):
    pixel_values, labels = dataset.get_preprocessed_item(i)
end_time = time.perf_counter()
elapsed_time = end_time - start_time
print(f"Time taken to fetch 1000 items: {elapsed_time:.2f} seconds")

Loading preprocessed data from: ../../datasets/cocodoom/preprocessed/run1/map01/rgb/000002.pt
Loading preprocessed data from: ../../datasets/cocodoom/preprocessed/run1/map01/rgb/000007.pt
Loading preprocessed data from: ../../datasets/cocodoom/preprocessed/run1/map01/rgb/000012.pt
Loading preprocessed data from: ../../datasets/cocodoom/preprocessed/run1/map01/rgb/000017.pt
Loading preprocessed data from: ../../datasets/cocodoom/preprocessed/run1/map01/rgb/000022.pt
Loading preprocessed data from: ../../datasets/cocodoom/preprocessed/run1/map01/rgb/000027.pt
Loading preprocessed data from: ../../datasets/cocodoom/preprocessed/run1/map01/rgb/000032.pt
Loading preprocessed data from: ../../datasets/cocodoom/preprocessed/run1/map01/rgb/000037.pt
Loading preprocessed data from: ../../datasets/cocodoom/preprocessed/run1/map01/rgb/000042.pt
Loading preprocessed data from: ../../datasets/cocodoom/preprocessed/run1/map01/rgb/000047.pt
Loading preprocessed data from: ../../datasets/cocodoom/prep