In [1]:
import numpy as np
from tqdm import tqdm
from skimage import io, transform
import matplotlib.pyplot as plt
import time
import glob
import xmltodict
import math
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils

In [2]:
data_images_path     = '../VOCdevkit/VOC2012/JPEGImages'
data_annotation_path = '../VOCdevkit/VOC2012/Annotations'
trained_model_path = './trained_model/'
image_sizes = [320,352,384,416,448,480,512,544,570,608]
image_depth  = 3
detection_conv_size = 3
subsampled_ratio = 32

### General info on dataset

In [10]:
#Get the image and annotation file paths
list_images      = sorted([x for x in glob.glob(data_images_path + '/**')])     #length : 17125
list_annotations = sorted([x for x in glob.glob(data_annotation_path + '/**')]) #length : 17125
total_images = len(list_images)

In [11]:
def get_classes(xml_files=list_annotations):
    '''
    Output: All the distinct classes for this dataset.
    
    '''
    classes = []
    
    for file in xml_files: 

        f = open(file)
        doc = xmltodict.parse(f.read()) #parse the xml file to python dict.

        #Images in the dataset might contain either 1 object or more than 1 object. For images with 1 object, the annotation for the object
        #in the xml file will be located in 'annotation' -> 'object' -> 'name'. For images with more than 1 object, the annotations for the objects
        #will be nested in 'annotation' -> 'object' thus requiring a loop to iterate through them. (Pascal VOC format)

        try: 
            #try iterating through the tag. (For images with more than 1 obj.)
            for obj in doc['annotation']['object']:
                classes.append(obj['name'].lower()) #append the lowercased string.

        except TypeError as e: #iterating through non-nested tags would throw a TypeError.
            classes.append(doc['annotation']['object']['name'].lower()) #append the lowercased string.

        f.close()

    classes = list(set(classes)) #remove duplicates.
    classes.sort()

    #returns a list containing the names of classes after being sorted.
    return classes

In [12]:
classes = get_classes()
num_of_class = len(classes)
excluded_classes = [] #if you wish to exclude certain classes for training.

### K-Means

In [13]:
class K_Means:
    
    
    def __init__(self, k, boxes):
        
        self.k = k
        self.boxes = boxes
        self.rows = self.boxes.shape[0]
        self.distances = np.empty((self.rows, self.k))
        self.last_centroids = np.zeros((self.rows,))
        
        self.boxes = self.process_boxes(self.boxes)
        self.centroids = []
        for i in range(self.k):
            self.centroids.append(self.boxes[i,:])
        
        self.centroids = np.asarray(self.centroids, dtype=np.float32)
        
    def process_boxes(self, boxes):
        
        new_boxes = boxes.copy()
        for row in range(self.rows):
            
            new_boxes[row][2] = np.abs(new_boxes[row][2] - new_boxes[row][0])
            new_boxes[row][3] = np.abs(new_boxes[row][3] - new_boxes[row][1])
        
        return np.delete(new_boxes, [0,1], axis=1)
    
    def iou(self, box, centroids):
        
        x = np.minimum(centroids[:, 0], box[0])
        y = np.minimum(centroids[:, 1], box[1])
        
        if np.count_nonzero(x == 0) > 0 or np.count_nonzero(y == 0) > 0:
            raise ValueError("The given box has no area!")
        
        intersection_area = x * y
        box_area = box[0] * box[1]
        centroid_area = centroids[:, 0] * centroids[:, 1]
        
        IoUs = intersection_area / (box_area + centroid_area - intersection_area)
        
        return IoUs
    
    def __call__(self):
        
        
        while True:
            
            for row in range(self.rows):
                self.distances[row] = 1 - self.iou(self.boxes[row], self.centroids)
            
            nearest_centroids = np.argmin(self.distances, axis=1)
            
            if (self.last_centroids == nearest_centroids).all():
                break
            
            for cluster in range(self.k):
                self.centroids[cluster] = np.mean(self.boxes[nearest_centroids == cluster], axis=0)
                
            self.last_centroids = nearest_centroids
        
        return self.centroids

### DataLoader

In [None]:
class Load_Dataset(Dataset):
    
    def __init__(self, resized_img_size, 
                 k=5, 
                 classes=classes,   
                 list_images=list_images,
                 list_annotations=list_annotations,
                 total_images=total_images,
                 )