In [11]:
import os
import cv2
import xml.etree.ElementTree as ET
from torchvision.transforms import transforms
from torchvision.datasets import VOCDetection
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import numpy as np

In [12]:
class CustomVOCDetection(VOCDetection):
    def __init__(self, root, transform=None, target_transform=None):
        self.root = root
        self.transform = transform
        self.target_transform = target_transform
        self.image_folder = os.path.join(self.root, 'PNGImages')
        self.annotation_folder = os.path.join(self.root, 'Annotations')
        self.image_files = os.listdir(self.image_folder)
        self.annotation_files = os.listdir(self.annotation_folder)

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, index):
        img_filename = self.image_files[index]
        img_path = os.path.join(self.image_folder, img_filename)
        img = cv2.imread(img_path)  # Load the image using OpenCV
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # Convert BGR to RGB

        annotation_filename = self.annotation_files[index]
        annotation_file = os.path.join(self.annotation_folder, annotation_filename)
        target = self.parse_voc_xml(ET.parse(annotation_file).getroot())

        if self.transform is not None:
            img = self.transform(img)

        if self.target_transform is not None:
            target = self.target_transform(target)

        return img, target

    def parse_voc_xml(self, node):
        # Initialize variables to store bounding box information
        boxes = []

        for obj in node.findall('object'):
            name = obj.find('name').text
            bndbox = obj.find('bndbox')
            xmin = int(bndbox.find('xmin').text)
            ymin = int(bndbox.find('ymin').text)
            xmax = int(bndbox.find('xmax').text)
            ymax = int(bndbox.find('ymax').text)

            # Append bounding box information to the list
            boxes.append([xmin, ymin, xmax, ymax])

        # Return the list of bounding boxes
        return {'boxes': boxes}

In [13]:
folder_path = r"C:\Users\xavim\Desktop\License_plates\Imagenes"

In [14]:
def get_dataloader(dataset_path):
    voc_dataset = VOCDetection(root=dataset_path, image_set='train', download=False)

    transform = transforms.Compose([transforms.Resize((224, 224)),
                                    transforms.ToTensor()])

    dataloader = DataLoader(voc_dataset, batch_size=32, shuffle=True, transform = transform)

    return dataloader

In [15]:
get_dataloader(folder_path)

RuntimeError: Dataset not found or corrupted. You can use download=True to download it