In [None]:
# import dependencies
from pathlib import Path
import cv2
import numpy as np

import torch
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader

In [None]:
# remove downloads
!rm -rf /content/YOLO
!rm -rf /content/COCO
!rm -rf /content/YOLO.zip
!rm -rf /content/COCO.zip
!rm -rf /content/sample_data

Download YOLO and COCO zip files from shared Google Drive via their links.

**YOLO dataset:** Split into *train* and *valid* folders each containing subfolders *images* and *labels*. All hotdog images from *images* are .jpg files and associated with one .txt file from *labels*.

**COCO dataset:** Split into *train* and *valid* folders. Each folder is populated with .jpg files of hotdog images and a single .json file containing annotations for all images within the folder.

In [None]:
!pip install gdown

# YOLO drive link
yolo_id = "1_w6_UtCZfdx2JYbrzVNyUn10dzdKzV4y"
!gdown --id {yolo_id} -O YOLO.zip
# unzip YOLO
!unzip -q YOLO.zip -d /content/YOLO

# COCO drive link
coco_id = "1dTfSNQ9Qk_T4BOuS1l0cdDQS5xtXrVJ4"
!gdown --id {coco_id} -O COCO.zip
# unzip COCO
!unzip -q COCO.zip -d /content/COCO

# check downloads
print('\n\n' + '-'*20)
print('Checking YOLO downloads, expecting "train" and "valid" folders:')
!ls "/content/YOLO/Hot Dog Detection YOLO"
print('Checking COCO downloads, expecting "train" and "valid" folders:')
!ls "/content/COCO/Hot Dog Detection COCO"

Downloading...
From (original): https://drive.google.com/uc?id=1_w6_UtCZfdx2JYbrzVNyUn10dzdKzV4y
From (redirected): https://drive.google.com/uc?id=1_w6_UtCZfdx2JYbrzVNyUn10dzdKzV4y&confirm=t&uuid=aec25eb4-51e9-4d1f-bce6-441dec41b583
To: /content/YOLO.zip
100% 44.2M/44.2M [00:00<00:00, 75.9MB/s]
Downloading...
From (original): https://drive.google.com/uc?id=1dTfSNQ9Qk_T4BOuS1l0cdDQS5xtXrVJ4
From (redirected): https://drive.google.com/uc?id=1dTfSNQ9Qk_T4BOuS1l0cdDQS5xtXrVJ4&confirm=t&uuid=10935cc4-67f5-452d-91e0-a69eb67b0b6e
To: /content/COCO.zip
100% 43.4M/43.4M [00:00<00:00, 49.0MB/s]


--------------------
Checking YOLO downloads, expecting "train" and "valid" folders:
train  valid
Checking COCO downloads, expecting "train" and "valid" folders:
train  valid


Analyze JSON structure used in COCO annotations. To synthezise COCO with the YOLO dataset (and for our hotdog detection use-case) we need both bounding box labels and object class. Note: These labels will need to be further processed to match YOLO's format.

In [None]:
import json

# see COCO json structure
coco_json = Path(r"/content/COCO/Hot Dog Detection COCO/train/_annotations.coco.json")
with open(coco_json, 'r') as f:
  data = json.load(f)
# print(json.dumps(data, indent=4))

def print_json_structure(d, indent=0):
    if isinstance(d, dict):
        for key, value in d.items():
            print("  " * indent + str(key))
            print_json_structure(value, indent + 2)
    elif isinstance(d, list):
        print("  " * indent + "[list]")
        if len(d) > 0:
            print_json_structure(d[0], indent + 2)

print_json_structure(data)

images
    [list]
        id
        license
        file_name
        height
        width
        date_captured
annotations
    [list]
        id
        image_id
        category_id
        bbox
            [list]
        area
        segmentation
            [list]
        iscrowd


Create **custom PyTorch Dataset** to both load and synthesize both YOLO and COCO datasets. This can be done by enforcing the same image and label formats between datasets, which I will choose to convert COCO data to YOLO formatting.

Note: For memory purposes, both images and labels are loaded and stored as file paths to avoid loading thousands of ndarrays into RAM at once. Instead the actual values will be loaded in per batch. This makes the dataset lighweight and resulting scalable.

In [None]:
class HotDogDataset(Dataset):
    def __init__(self, yolo_src_path, coco_src_path, subset='train', transform=None):
        '''
        Args:
        yolo_src_path (Path): Path to source YOLO data folder.
        coco_src_path (Path): Path to source COCO data folder.
        subset (str): Set to be loaded. Either 'train' or 'valid'.
        '''
        self.yolo_root = yolo_src_path / subset
        self.coco_root = coco_src_path / subset
        self.transform = transform
        # only store file names in memory, process them on the fly <- __getitem__
        self.image_files = []
        self.label_files = []
        self.fetch_data_paths()


    def fetch_data_paths(self):
        # fetch YOLO files
        img_dir = self.yolo_root / 'images'
        label_dir = self.yolo_root / 'labels'
        for img_path in list(img_dir.glob('*.jpg')):
            associated_label = label_dir / (img_path.stem + '.txt')
            if associated_label.exists():
                self.image_files.append(img_path)
                self.label_files.append(associated_label)

        # fetch COCO files
        annotations = self.coco_root / '_annotations.coco.json'
        with open(annotations, 'r') as f:
            coco_data = json.load(f)

        # goal is to get id from image then map id onto annotations key to get associated labels (eg. bbox)
        # first, create map + grouping annotations from the same image
        self.id_to_annotation = {}
        for annotation in coco_data['annotations']:
            self.id_to_annotation.setdefault(annotation['image_id'], []).append(annotation)
        # match id to filename <- filename is value in image dict
        self.img_metadata = {img['id']: img for img in coco_data['images']}

        for img_id, metadata in self.img_metadata.items():
            img_path = self.coco_root / metadata['file_name']
            if img_path.exists():
                self.image_files.append(img_path)
                self.label_files.append(img_id) # later used to map onto annotations using self.id_to_annotation dict


    def _yolo_label(self, label_file):
        '''Takes yolo formatted label file (.txt) and extracts usable label.'''
        # YOLO labels are in format "class_id x_center y_center box_width box_height"
        # the values are normalized between 0 and 1, propotional to image size.
        with open(label_file, 'r') as f:
            lines = f.readlines()
            labels = np.array([line.strip().split() for line in lines], dtype=np.float32)

        return labels[:,1:], labels[:,0].astype(np.int32) # all bboxes, all classes


    def _coco_label(self, img_id):
        '''Takes id of coco image and fetches the associated bbox and class label from
        annotation dict. Converting to YOLO format.'''
        bboxes, classes = [], []
        img_width, img_height = self.img_metadata[img_id]['width'], self.img_metadata[img_id]['height'] # for normalization

        for annotation in self.id_to_annotation.get(img_id, []):
            x_top_left, y_top_left, w, h = annotation['bbox']
            # center x and y and normalize all, 0 to 1
            norm_cx = (x_top_left + w/2) / img_width
            norm_cy = (y_top_left + h/2) / img_height
            norm_w = w / img_width
            norm_h = h / img_height

            bboxes.append([norm_cx, norm_cy, norm_w, norm_h])
            classes.append(annotation['category_id'])

        return np.array(bboxes, dtype=np.float32), np.array(classes, dtype=np.int32)


    def __getitem__(self, idx):
        img_path = self.image_files[idx]
        img = cv2.imread(str(img_path)) # cv2 loads image in BGR
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # convert to RGB

        labels = self.label_files[idx]

        # YOLO case, label (Path): path to .txt file
        if isinstance(labels, Path):
            bboxes, classes = self._yolo_label(labels)
        # COCO case, label (int): image_id
        elif isinstance(labels, int):
            bboxes, classes = self._coco_label(labels)
        else:
            raise ValueError

        if self.transform:
            transformed = self.transform(img, bboxes, classes)
            img, bboxes, classes = transformed['image'], transformed['bboxes'], transformed['classes']
        else:
            img = cv2.resize(img, (224,224))
            img = transforms.ToTensor()(img) # handles 0 to 1 normalization and permute -> (C,H,W)
            # using ImageNet's normalization weights since dealing with natural images
            img = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])(img)

        bboxes_tensor = torch.tensor(bboxes, dtype=torch.float32)
        classes_tensor = torch.tensor(classes, dtype=torch.long)

        return {'image': img,
                'targets': {'bboxes': bboxes_tensor, 'classes': classes_tensor},
                'image_path': img_path
        }


    def __len__(self):
        return len(self.image_files)


    # needed since pytorch cannot 'auto-batch' labels with varying number of bounding boxes, labels have
    # different tensor sizes -> so instead of torch.stack that pytorch would do automatically, we leave labels as a list of tensors
    # images must be stacked though
    def custom_collate_fn(self, batch):
        batch_images, batch_targets, batch_image_paths = [], [], []
        for sample in batch:
            batch_images.append(sample['image'])
            batch_targets.append(sample['targets'])
            batch_image_paths.append(sample['image_path'])

        batch_images = torch.stack(batch_images)
        return {'batch_images': batch_images,
                'batch_targets': batch_targets,
                'batch_image_paths': batch_image_paths
        }

**Initialize dataset and dataloaders** using the custom Pytorch dataset created above. Sticking to just train and validation sets right now since that's how the downloaded data was presented (can always change the split).

In [None]:
yolo_src_path = Path("/content/YOLO/Hot Dog Detection YOLO")
coco_src_path = Path("/content/COCO/Hot Dog Detection COCO")

# datasets
train_dataset = HotDogDataset(yolo_src_path, coco_src_path, subset='train')
valid_dataset = HotDogDataset(yolo_src_path, coco_src_path, subset='valid')

# dataloaders
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, collate_fn=train_dataset.custom_collate_fn)
valid_loader = DataLoader(valid_dataset, batch_size=4, shuffle=False, collate_fn=valid_dataset.custom_collate_fn)

Check the batch output (shape). Batched images should be a tensor of shape (batch_size, C, H, W). Both amount of targets and image paths should be identical to the batch_size the loader was initialized with. The bboxes target should be shape(num_annotated_boxes, 4) and the classes target should be (num_annotated_boxes).

In [None]:
# batch output <- what is returned after collate_fn
for i in range(2):
  batch = next(iter(train_loader))
  print(f'Example Batch {i+1}\n' + '-'*20)
  print(f"Batched images shape: {batch['batch_images'].shape}")
  print(f"Amount of targets: {len(batch['batch_targets'])}")
  print(f"   First bboxes annotation (in batch) shape: {batch['batch_targets'][0]['bboxes'].shape}")
  print(f"   First classes annotation (in batch) shape: {batch['batch_targets'][0]['classes'].shape}")
  print(f"Number of batch's image paths: {len(batch['batch_image_paths'])}\n") # should give batch_size <- sanity check


Example Batch 1
--------------------
Batched images shape: torch.Size([4, 3, 224, 224])
Amount of targets: 4
   First bboxes annotation (in batch) shape: torch.Size([3, 4])
   First classes annotation (in batch) shape: torch.Size([3])
Number of batch's image paths: 4

Example Batch 2
--------------------
Batched images shape: torch.Size([4, 3, 224, 224])
Amount of targets: 4
   First bboxes annotation (in batch) shape: torch.Size([1, 4])
   First classes annotation (in batch) shape: torch.Size([1])
Number of batch's image paths: 4

