In [1]:
from torch.utils.data import Dataset
import pandas as pd
import numpy as np
import torch
from torchvision import transforms
import os
from PIL import Image
from model_utils import IMAGE_SIZE, imageTransforms, plot_tensor
from torch.utils.data import DataLoader
from iou_yolo import iou_yolo

class OpenImagesDataset(Dataset):
    def __init__(self, rootDirectory, anchorBoxes, transform=None, dataType='train', gridSize=3):
        # Root directory of the Open-Images Dataset
        self.rootDirectory = rootDirectory
        
        # Defining the gridSize*gridSize applied to every image
        self.gridSize = gridSize
        
        # Directory containing the images
        self.imageDirectory = f"{self.rootDirectory}/{dataType}/data"
        
        # The transformation of the images to apply
        self.transform = transform
        
        # The data set type (train/validation/test)
        self.dataType = dataType
        
        # Image labels (i.e. true bounding boxes)
        self.labels = pd.read_csv(f"{rootDirectory}/{dataType}/labels/detections.csv", index_col=0)
        
        # Computing the width/height of each bounding box (constrained in (0,1))
        self.labels['boxWidth'] = self.labels['XMax'] - self.labels['XMin']
        self.labels['boxHeight'] = self.labels['YMax'] - self.labels['YMin']
        
        # Computing the Center of each bounding box
        self.labels['XCenter'] = (self.labels['XMax'] - self.labels['XMin'])/2
        self.labels['YCenter'] = (self.labels['YMax'] - self.labels['YMin'])/2
        
        # Computing the grid cell each box falls into 
        self.labels['gridCellRow'] = (self.labels['XCenter'] * self.gridSize).astype(int)
        self.labels['gridCellCol'] = (self.labels['YCenter'] * self.gridSize).astype(int)
        
        self.labels['XGridCell'] = self.labels['XCenter'] * self.gridSize - self.labels['gridCellRow']
        self.labels['YGridCell'] = self.labels['YCenter'] * self.gridSize - self.labels['gridCellCol']
        
        # List of imageIDs with no duplicates
        self.imageList = self.labels['ImageID'].drop_duplicates()
        
        # Pre-defined anchor box width/heights
        # [(Width, Heights)...(Width, Heights)]
        with open(anchorBoxes, 'rb') as f:
            self.anchorBoxes = np.load(f)
            
        self.numAnchorBoxes = self.anchorBoxes.shape[0]
            
    def __len__(self):
        return len(self.imageList)
    
    def __getitem__(self, index):
        # Creating the path of the iamge
        img_path = os.path.join(self.imageDirectory, f"{self.imageList.iloc[index]}.jpg")
        
        # Reading in the image
        image = Image.open(img_path)
        
        # Transforming the image according to its datatype
        x = imageTransforms[self.dataType](image)

        return x, 1
    
trainingData = OpenImagesDataset(rootDirectory='open-images-v6', 
                                 anchorBoxes='centroids.npy', 
                                 transform=imageTransforms, 
                                 dataType='validation')    

trainDataLoader = DataLoader(dataset=trainingData, 
                             batch_size=1,
                             num_workers=1)
data, box = next(iter(trainDataLoader))

In [2]:
y = torch.zeros((trainingData.gridSize, trainingData.gridSize, trainingData.numAnchorBoxes*5))

In [3]:
objectDims = trainingData.labels[
    trainingData.labels['ImageID'] == trainingData.imageList.iloc[0]
][['boxWidth','boxHeight','XGridCell','YGridCell', 'gridCellRow', 'gridCellCol']].values

In [19]:
# Computing IOU the box and each anchor box (using width/height) for every grid cell
for objectDim in objectDims:
    print(objectDim)            
    
    # Computing the IOUs for all training boxes
    ious = iou_yolo(objectDim[[0,1]], 
                    trainingData.anchorBoxes)
    
    # Computing which anchor box we should use
    bestAnchorBox = np.argmax(ious)
    
    bestAnchorBoxWidth = trainingData.anchorBoxes[bestAnchorBox,0]
    bestAnchorBoxHeight = trainingData.anchorBoxes[bestAnchorBox,1]
    
    relativeWidth = objectDim[0] / bestAnchorBoxWidth
    
    relativeHeight = objectDim[1] / bestAnchorBoxHeight

    print(trainingData.anchorBoxes)
    
    # Assigning object to its grid cell / Best Anchor Box
    y[
        objectDim[4].astype(int), 
        objectDim[5].astype(int), 
        bestAnchorBox*5:bestAnchorBox*5+5
    ] = torch.tensor([1, objectDim[0], objectDim[1], relativeWidth, relativeHeight])


    
    
    






[0.37315632 0.53982297 0.55973448 0.80973446 0.         0.        ]
[[0.19638349 0.26667985]
 [0.08624264 0.12069918]
 [0.83730476 0.88997494]
 [0.41217437 0.35264859]
 [0.28597913 0.56793756]
 [0.691825   0.54561481]
 [0.48818813 0.76813516]]
4
