In [1]:
# System
import os
import time
from IPython.display import clear_output
from tqdm.auto import tqdm

# Standard libs
import numpy as np
import pandas as pd
import random

# Plotting
import matplotlib.pyplot as plt

# Image utils
from PIL import Image
import shutil
import cv2
from glob import glob

# PyTorch 
import torch
import torchvision
import torch.nn.functional as F
import torchvision.datasets
from torch import nn
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split, Dataset, Subset
from torchvision.io import read_image
from torchvision.datasets import DatasetFolder
from torchvision.datasets.folder import default_loader

In [2]:
isinstance('asd', str)

True

In [51]:
# DATA_PATH = '/kaggle/input/german-traffic-sign-detection-benchmark-gtsdb'
DATA_PATH = 'DATA/GTSDB/'
# TRAIN_DATA_PATH = os.path.join(DATA_PATH, 'TestIJCNN2013/TestIJCNN2013Download')
TRAIN_DATA_PATH = os.path.join(DATA_PATH, 'Train')
TEST_DATA_PATH = os.path.join(DATA_PATH, 'TrainIJCNN2013/TrainIJCNN2013')

In [25]:
class TrafficSignsDatasetYOLO(Dataset):
    def __init__(self, img_dir, annotations_file, transform=None):
        self.img_dir = img_dir
        self.transform = transform
        self.annotations = pd.read_csv(annotations_file, sep=";", header=None,
                                        names=["filename", "x1", "y1", "x2", "y2", "class"])
        
    def __len__(self):
        return len(self.annotations)
    
    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.annotations.iloc[idx, 0])
        image = Image.open(img_path).convert("RGB")
        boxes = []
        labels = []
        # fetch all the bounding boxes and labels for the current image
        for _, row in self.annotations[self.annotations.filename == self.annotations.iloc[idx, 0]].iterrows():
            boxes.append([row["x1"], row["y1"], row["x2"], row["y2"]])
            labels.append(row["class"])
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        # change the coordinates of bounding boxes to x_center, y_center, width, height and normalize
        boxes[:, :2] = (boxes[:, :2] + boxes[:, 2:]) / 2.0  # x_center, y_center
        boxes[:, 2:] = boxes[:, 2:] - boxes[:, :2]  # width, height
        boxes /= torch.tensor([image.width, image.height, image.width, image.height])
        labels = torch.as_tensor(labels, dtype=torch.int64)
        
        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["image_id"] = torch.tensor([idx])
        
        if self.transform:
            image, target = self.transform(image, target)
            
        return image, target


In [None]:
dt = TrafficSignsDatasetYOLO(os.path.join(DATA_PATH, 'Train'), os.path.join(DATA_PATH, 'gt_jpg.txt'))
loader = iter(dt)
next(loader)

In [57]:
class TrafficSignsDatasetRCNN(Dataset):
    def __init__(self, img_dir, annotations_file, transforms=None):
        self.img_dir = img_dir
        self.transforms = transforms
        self.annotations = pd.read_csv(annotations_file, sep=";", header=None,
                                    names=["filename", "x1", "y1", "x2", "y2", "class"])
 
    def __getitem__(self, idx):
        # Load image path
        img_path = os.path.join(self.img_dir, self.annotations.iloc[idx, 0])
        #Load image as PIL
        img = Image.open(img_path).convert("RGB")        
        # Get objects
        objects = self.annotations[self.annotations.filename == self.annotations.iloc[idx, 0]]
        # Get bounding box coordinates for each object in image
        boxes = []
        labels = []
        # fetch all the bounding boxes and labels for the current image
        for _, row in objects.iterrows():
            boxes.append([row["x1"], row["y1"], row["x2"], row["y2"]])
            labels.append(row["class"])

        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.as_tensor(labels, dtype=torch.int64)        
 
        image_id = torch.tensor([idx])
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        # suppose all instances are not crowd
        iscrowd = torch.zeros((len(objects),), dtype=torch.int64)
 
        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["image_id"] = image_id
        target["area"] = area
        target["iscrowd"] = iscrowd
 
        if self.transforms is not None:
            # Note that target (including bbox) is also transformed\enhanced here, which is different from transforms from torchvision import
            # Https://github.com/pytorch/vision/tree/master/references/detectionOfTransforms.pyThere are examples of target transformations when RandomHorizontalFlip
            img, target = self.transforms(img, target)
 
        return img, target
 
    def __len__(self):
        return len(self.imgs)

In [58]:
dt = TrafficSignsDatasetRCNN(os.path.join(DATA_PATH, 'Train'), os.path.join(DATA_PATH, 'gt_jpg.txt'))
loader = iter(dt)

In [92]:
next(loader)

(<PIL.Image.Image image mode=RGB size=1360x800>,
 {'boxes': tensor([[ 998.,  292., 1074.,  371.],
          [  46.,  350.,  136.,  444.],
          [ 945.,  435., 1006.,  492.]]),
  'labels': tensor([ 1, 23, 23]),
  'image_id': tensor([33]),
  'area': tensor([6004., 8460., 3477.]),
  'iscrowd': tensor([0, 0, 0])})