In [1]:
import torch
import sys 
sys.path.append('../vision')
import torchvision
from torch.utils.data import DataLoader

In [2]:
class VOCDataloader:
    def __init__(self, year):
        
        BATCH_SIZE = 64
        transforms = torchvision.transforms.Compose([
            torchvision.transforms.Resize((224,224)),
            torchvision.transforms.ToTensor()
        ])
        datasets = {}
        
        for image_set in ['train', 'trainval', 'val']:
            datasets[image_set] = torchvision.datasets.VOCDetection(
                './dataloaders/datasets/voc',
                year=year,
                image_set=image_set,
                download=True,
                transform=transforms
            )

        self.val_dataset = datasets['val']
        self.train = DataLoader(datasets['train'], batch_size=BATCH_SIZE, num_workers=4, shuffle=True)
        self.val = DataLoader(datasets['trainval'], batch_size=BATCH_SIZE, num_workers=4, shuffle=True)
        self.test = DataLoader(datasets['val'], batch_size=BATCH_SIZE, num_workers=4, shuffle=True)

In [3]:
voc_dataloader = VOCDataloader('2007') 

Using downloaded and verified file: ./dataloaders/datasets/voc/VOCtrainval_06-Nov-2007.tar
Using downloaded and verified file: ./dataloaders/datasets/voc/VOCtrainval_06-Nov-2007.tar
Using downloaded and verified file: ./dataloaders/datasets/voc/VOCtrainval_06-Nov-2007.tar


In [4]:
voc_dataloader.val_dataset[5][1]['annotation']

{'folder': 'VOC2007',
 'filename': '000020.jpg',
 'source': {'database': 'The VOC2007 Database',
  'annotation': 'PASCAL VOC2007',
  'image': 'flickr',
  'flickrid': '194986987'},
 'owner': {'flickrid': 'KhE ?', 'name': 'Khedara Ariyaratne'},
 'size': {'width': '375', 'height': '500', 'depth': '3'},
 'segmented': '0',
 'object': [{'name': 'car',
   'pose': 'Unspecified',
   'truncated': '0',
   'difficult': '0',
   'bndbox': {'xmin': '33', 'ymin': '148', 'xmax': '371', 'ymax': '416'}}]}

In [6]:
for batch in voc_dataloader.test:
    print(len(batch[0]))

[tensor([[[[0.7176, 0.7882, 0.7451,  ..., 0.5882, 0.3882, 0.6431],
          [0.2000, 0.3490, 0.2824,  ..., 0.5020, 0.4745, 0.6980],
          [0.2706, 0.3765, 0.5294,  ..., 0.6667, 0.6118, 0.6824],
          ...,
          [0.7020, 0.7098, 0.7020,  ..., 0.6667, 0.6549, 0.5412],
          [0.6863, 0.6863, 0.6980,  ..., 0.3882, 0.4667, 0.5647],
          [0.6588, 0.6706, 0.6824,  ..., 0.2118, 0.2627, 0.2941]],

         [[0.6980, 0.7804, 0.7451,  ..., 0.6196, 0.3686, 0.6588],
          [0.2039, 0.3608, 0.3020,  ..., 0.6157, 0.5725, 0.8039],
          [0.3098, 0.4196, 0.5804,  ..., 0.8118, 0.7608, 0.8196],
          ...,
          [0.6431, 0.6471, 0.6353,  ..., 0.6157, 0.6235, 0.5059],
          [0.6275, 0.6235, 0.6314,  ..., 0.3020, 0.4039, 0.5098],
          [0.6000, 0.6118, 0.6235,  ..., 0.1412, 0.1882, 0.2275]],

         [[0.6863, 0.7725, 0.7412,  ..., 0.6471, 0.4039, 0.6824],
          [0.2039, 0.3647, 0.3137,  ..., 0.6275, 0.5922, 0.8275],
          [0.3255, 0.4392, 0.6000,  ..., 