This notebook is used to preprocess the CocoDoom dataset to allow for faster training.

Each image will be loaded, preprocessed, and saved as a tensor shard in the same location as the original images.

In [17]:
# Add project directory to path for imports
import sys
import os
sys.path.append(os.path.join(os.pardir))

from PIL import Image
from Vision.datasets import CocoDoomDataset
from transformers import DetrImageProcessor

import torch

In [18]:
# create preprocessor
processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")

# create dataset instance
dataset = CocoDoomDataset(
    data_dir=os.path.join(os.pardir, os.pardir, "datasets", "cocodoom"),
    annotation_file_name="run-train.json",
    processor=processor
)

loading annotations into memory...
Done (t=0.95s)
creating index...
index created!
loading annotations into memory...
Done (t=0.93s)
creating index...
index created!
Loaded run-train.json
Number of images: 50732
Number of Categories: 94


In [None]:
# full preprocessing was moved to a python script in Vision
from tqdm import tqdm

split_name = "train"
saved, skipped = 0, 0
save_root = os.path.join(
    os.pardir, os.pardir, "datasets", "cocodoom", "preprocessed"
)
os.makedirs(save_root, exist_ok=True)

for i in tqdm(range(1000), desc=f"Preprocessing {split_name}"):
    image, target, img_file_name = dataset.get_image(i)

    encoding = processor(
        images=image,
        annotations=target,
        return_tensors="pt"
    )

    pixel_values = encoding['pixel_values'].squeeze()
    target = dict(encoding['labels'][0])

    # reduce format of target tensors
    target['boxes'] = target['boxes'].to(torch.float16)
    del target['size']
    del target['orig_size']
    # we only have 94 categories
    target['class_labels'] = target['class_labels'].to(torch.int16)
    del target['area']  # remove area to save space
    del target['iscrowd'] # remove iscrowd to save space

    # modify file name to have .pt extension
    pt_file_name = os.path.splitext(img_file_name)[0] + ".pt"
    save_path = os.path.join(save_root, pt_file_name)

    if not os.path.exists(os.path.dirname(save_path)):
        os.makedirs(os.path.dirname(save_path), exist_ok=True)

    if os.path.exists(save_path):
        skipped += 1
        continue

    torch.save(
        {
            "pixel_values": pixel_values,
            "labels": target
        },
        save_path
    )
    saved += 1

print(f"{split_name}: saved {saved}, skipped {skipped}")

Preprocessing train: 100%|██████████| 1000/1000 [00:16<00:00, 60.82it/s]

train: saved 1, skipped 0





In [20]:
pixel_values, labels = dataset[0]
print(f"Loaded pixel values shape: {pixel_values.shape}")
print(f"Loaded labels: {labels}")

print(f"{labels['class_labels'].dtype}")

Loaded pixel values shape: torch.Size([3, 800, 1280])
Loaded labels: {'size': tensor([ 800, 1280]), 'image_id': tensor([1010000002]), 'class_labels': tensor([0, 0]), 'boxes': tensor([[0.4328, 0.6225, 0.0531, 0.1250],
        [0.5484, 0.5700, 0.0469, 0.1000]]), 'area': tensor([2880., 1840.]), 'iscrowd': tensor([0, 0]), 'orig_size': tensor([200, 320])}
torch.int64


In [21]:
# benchmarking __getitem__ when including preprocessing
import time

start_time = time.perf_counter()
for i in range(1000):
    pixel_values, labels = dataset.slow__getitem__(i)
end_time = time.perf_counter()
elapsed_time = end_time - start_time
print(f"Time taken to fetch 1000 items: {elapsed_time:.2f} seconds")

AttributeError: 'CocoDoomDataset' object has no attribute 'slow__getitem__'

In [None]:
# benchmarking __getitem__ without preprocessing
import time

start_time = time.perf_counter()
for i in range(1000):
    pixel_values, labels = dataset[i]
end_time = time.perf_counter()
elapsed_time = end_time - start_time
print(f"Time taken to fetch 1000 items: {elapsed_time:.2f} seconds")

Time taken to fetch 1000 items: 0.41 seconds
