In [None]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import _init_paths
# --------------------------------------------------------
# Copyright (c) 2019 University of Twente.
# Licensed under The MIT License [see LICENSE for details]
# Written by Ye Lyu.
# --------------------------------------------------------

import datasets
import datasets.imagenet
import os, sys
from datasets.imdb import imdb
import xml.dom.minidom as minidom
import xml.etree.ElementTree as ET
import numpy as np
import numpy.random as npr
import scipy.sparse
import scipy.io as sio
import subprocess
import pdb
import cPickle
import random

try:
    xrange          # Python 2
except NameError:
    xrange = range  # Python 3

In [None]:
from __future__ import print_function
# --------------------------------------------------------
# Copyright (c) 2019 University of Twente.
# Licensed under The MIT License [see LICENSE for details]
# Written by Ye Lyu.
# --------------------------------------------------------

import datasets
import datasets.imagenet
import os, sys
from datasets.imdb import imdb
import xml.dom.minidom as minidom
import numpy as np
import scipy.sparse
import scipy.io as sio
import subprocess
import pdb
import pickle
import random
from PIL import Image
try:
    xrange          # Python 2
except NameError:
    xrange = range  # Python 3

class VisDroneDETVID(imdb):
    def __init__(self, image_set, devkit_path, data_path):
        imdb.__init__(self, 'VisDroneDETVID_'+image_set)
        self._image_set = image_set
        self._devkit_path = devkit_path
        self._data_path = data_path
        self.DET_folder_prefix = 'VisDrone2018-DET-'
        self.VID_folder_prefix = 'VisDrone2018-VID-'

        # There are 10 classes for VisDrone2018 dataset.
        self._classes = ('__background__',
                         'pedestrian', 
                         'person', 
                         'car', 
                         'van', 
                         'bus', 
                         'truck', 
                         'motor', 
                         'bicycle', 
                         'awning-tricycle', 
                         'tricycle')
        self._ids = (0,1,2,3,4,5,6,7,8,9,10,11)
        self._image_ext = ['.jpg']
        
        self._image_index = self._load_image_set_index()
        
        self._transform_det_annotation(
            det_ann_dir=os.path.join(data_path,self.DET_folder_prefix+self._image_set,'annotations'), 
            output_ann_dir=os.path.join(data_path,self.DET_folder_prefix+self._image_set,'my_annotations'))
        
        self._transform_vid_annotation(
            vid_ann_dir=os.path.join(data_path,self.VID_folder_prefix+self._image_set,'annotations')
            ,output_ann_dir=os.path.join(data_path,self.VID_folder_prefix+self._image_set,'my_annotations'))
        # Default to roidb handler
        self._roidb_handler = self.gt_roidb

        # Specific config options
        self.config = {'cleanup'  : True,
                       'use_salt' : True,
                       'top_k'    : 2000}

        #assert os.path.exists(self._devkit_path), 'Devkit path does not exist: {}'.format(self._devkit_path)
        assert os.path.exists(self._data_path), 'Path does not exist: {}'.format(self._data_path)

    def image_path_at(self, i):
        """
        Return the absolute path to image i in the image sequence.
        """
        return self.image_path_from_index(self._image_index[i])

    def image_id_at(self, i):
        """
        Return the absolute path to image i in the image sequence.
        """
        return self._image_index[i]

    def image_path_from_index(self, index):
        """
        Construct an image path from the image's "index" identifier.
        """
        #print(index)
        #image_path = os.path.join(self._data_path, 'Data', self._image_set, index + self._image_ext[0])
        image_path = index# + self._image_ext[0]
        #print(image_path)
        assert os.path.exists(image_path), 'path does not exist: {}'.format(image_path)
        return image_path

    def _transform_det_annotation(self, det_ann_dir, output_ann_dir):
        if os.path.exists(output_ann_dir) is False:
            print('Create ann files for DET')
            os.makedirs(output_ann_dir)
            assert os.path.exists(output_ann_dir),'Cannot create directory:%s'%(output_ann_dir)
        
        det_txts = os.listdir(det_ann_dir)
        for det_ann in det_txts:
            output_det_ann_dir = output_ann_dir
            with open(os.path.join(det_ann_dir,det_ann)) as f:
                lines = f.readlines()
                fname = os.path.join(output_det_ann_dir, det_ann[:-4]+'.xml')
                if os.path.exists(fname):
                    continue
                print('Create ann file for: %s'%(det_ann))
                with open(fname,'w') as f_obj:
                    ann = ET.Element('annotation')
                    for item in lines:
                        ents = item.strip().split(',')
                        if len(ents)==8:
                            b_x,b_y,b_w,b_h,score,cat,trunc,occl = ents
                        elif len(ents)==9:
                            b_x,b_y,b_w,b_h,score,cat,trunc,occl,_ = ents
                        else:
                            print(ents)
                            b_x,b_y,b_w,b_h,score,cat,trunc,occl = ents
                        if score=='1':
                            x1=b_x
                            y1=b_y
                            x2=str(float(b_x)+float(b_w))
                            y2=str(float(b_y)+float(b_h))
                            
                            obj = ET.SubElement(ann, 'object')
                            category = ET.SubElement(obj, 'category')
                            bndbox = ET.SubElement(obj, 'bndbox')
                            xmax = ET.SubElement(bndbox, 'xmax')
                            xmin = ET.SubElement(bndbox, 'xmin')
                            ymax = ET.SubElement(bndbox, 'ymax')
                            ymin = ET.SubElement(bndbox, 'ymin')

                            category.text = cat
                            xmax.text = x2
                            xmin.text = x1
                            ymax.text = y2
                            ymin.text = y1

                    # create a new XML file with the results
                    myann = minidom.parseString(ET.tostring(ann)).toprettyxml()
                    f_obj.write(myann)
    
    def _transform_vid_annotation(self, vid_ann_dir, output_ann_dir):
        vid_txts = os.listdir(vid_ann_dir)
        for vid_ann in vid_txts:
            output_vid_ann_dir = os.path.join(output_ann_dir, vid_ann[:-4])
            if os.path.exists(output_vid_ann_dir) is False:
                print('Create ann files for vid:%s'%(vid_ann))
                os.makedirs(output_vid_ann_dir)
                assert os.path.exists(output_vid_ann_dir),'Cannot create directory:%s'%(output_vid_ann_dir)
            else:
                continue
            with open(os.path.join(vid_ann_dir,vid_ann)) as f:
                lines = f.readlines()
                f_gts = {}
                for item in lines:
                    f_id,t_id,b_x,b_y,b_w,b_h,score,cat,trunc,occl = item.split(',')
                    if score=='1':
                        f_id=int(f_id)
                        x1=b_x
                        y1=b_y
                        x2=str(float(b_x)+float(b_w))
                        y2=str(float(b_y)+float(b_h))
                        if f_id in f_gts.keys():
                            f_gts[f_id].append((x1,y1,x2,y2,cat,t_id))
                        else:
                            f_gts[f_id] = [(x1,y1,x2,y2,cat,t_id)]
                for f_id in f_gts.keys():
                    fname = os.path.join(output_vid_ann_dir, '%07d.xml'%f_id)
                    with open(fname,'w') as f_obj:
                        ann = ET.Element('annotation')  
                        for _obj in f_gts[f_id]:
                            x1,y1,x2,y2,cat,t_id = _obj
                            obj = ET.SubElement(ann, 'object')
                            trackid = ET.SubElement(obj, 'trackid')
                            category = ET.SubElement(obj, 'category')
                            bndbox = ET.SubElement(obj, 'bndbox')
                            xmax = ET.SubElement(bndbox, 'xmax')
                            xmin = ET.SubElement(bndbox, 'xmin')
                            ymax = ET.SubElement(bndbox, 'ymax')
                            ymin = ET.SubElement(bndbox, 'ymin')
                            
                            trackid.text = t_id
                            category.text = cat
                            xmax.text = x2
                            xmin.text = x1
                            ymax.text = y2
                            ymin.text = y1
                            
                        # create a new XML file with the results
                        myann = minidom.parseString(ET.tostring(ann)).toprettyxml()
                        f_obj.write(myann)  
                        
    def _load_image_set_index(self):
        """
        Load image set index for both image and video dataset.
        """
        num_img_per_vid = 20
        if self._image_set == 'train':
            # First, load video data.
            image_indexes = []
            VID_sequence_folder = os.path.join(self._data_path, self.VID_folder_prefix+'train','sequences')
            dir_indexes = [os.path.join(VID_sequence_folder,d) 
                         for d in os.listdir(VID_sequence_folder) if os.path.isdir(os.path.join(VID_sequence_folder,d))]
        
            VID_indexes = []
            for d in dir_indexes:
                file_names = os.listdir(d)
                n_files = len(file_names)
                gap = int(n_files/num_img_per_vid)
                for i in range(num_img_per_vid):
                    VID_indexes.append(os.path.join(d,'%07d.jpg'%(gap*i+1)))
                    assert os.path.exists(VID_indexes[-1]),VID_indexes[-1]+' does not exists.'
            
            # Second, load image data.
            DET_folder = os.path.join(self._data_path, self.DET_folder_prefix+'train','images')
            file_names = os.listdir(DET_folder)
            DET_indexes = []
            for fname in file_names:
                DET_indexes.append(os.path.join(DET_folder, fname))
                
            image_indexes = DET_indexes+VID_indexes
            print('Total number of DET images is:%d'%(len(DET_indexes)))
            print('Total number of VID images is:%d'%(len(VID_indexes)))
            return image_indexes
        else:
            image_indexes = []
            print('Total number of video images is:%d' % (len(image_indexes)))
            return image_indexes

    def gt_roidb(self):
        """
        Return the database of ground-truth regions of interest.
        This function loads/saves from/to a cache file to speed up future calls.
        """
        cache_file = os.path.join(self.cache_path, self.name + '_gt_roidb.pkl')
        print(cache_file)
        if os.path.exists(cache_file):
            with open(cache_file, 'rb') as fid:
                roidb = pickle.load(fid)
            print('{} gt roidb loaded from {}'.format(self.name, cache_file))
            return roidb

        print('len(self.image_index))', len(self.image_index))
        gt_roidb = [self._load_annotation(index)
                    for index in self.image_index]
        with open(cache_file, 'wb') as fid:
            pickle.dump(gt_roidb, fid, pickle.HIGHEST_PROTOCOL)
        print('wrote gt roidb to {}'.format(cache_file))
        return gt_roidb

    def _load_annotation(self, index):
        """
        Load image and bounding boxes info from txt files of imagenet.
        """
        #filename = os.path.join(self._data_path, 'Annotations', self._image_set, index + '.xml')
        filename = index.replace('images','my_annotations')
        filename = filename.replace('sequences','my_annotations')
        filename = filename.replace('jpg','xml')
        
        assert os.path.exists(filename),'%s'%(filename)
        
        def get_data_from_tag(node, tag):
            return node.getElementsByTagName(tag)[0].childNodes[0].data

        with open(filename) as f:
            data = minidom.parseString(f.read())

        objs = data.getElementsByTagName('object')
        num_objs = len(objs)
        ##########################################

        boxes = np.zeros((num_objs, 4), dtype=np.uint16)
        gt_classes = np.zeros((num_objs), dtype=np.int32)
        overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32)
        
        ##########################################
        # Load object bounding boxes into a data frame.
        # check x,y in correct range.
        width, height = Image.open(index).size
        for ix, obj in enumerate(objs):
            x1 = float(get_data_from_tag(obj, 'xmin'))
            y1 = float(get_data_from_tag(obj, 'ymin'))
            x2 = float(get_data_from_tag(obj, 'xmax'))
            y2 = float(get_data_from_tag(obj, 'ymax'))
            cls = int(get_data_from_tag(obj, "category"))
            x1 = min(max(0, x1), width - 1)
            x2 = min(max(0, x2), width - 1)
            y1 = min(max(0, y1), height - 1)
            y2 = min(max(0, y2), height - 1)
            boxes[ix, :] = [x1, y1, x2, y2]
            gt_classes[ix] = cls
            overlaps[ix, cls] = 1.0

        overlaps = scipy.sparse.csr_matrix(overlaps)

        return {'boxes' : boxes,
                'gt_classes': gt_classes,
                'gt_overlaps' : overlaps,
                'flipped' : False}

In [None]:
ds = VisDroneDETVID('train',None,data_path='data/VisDrone2018')

In [None]:
img_ids = ds._load_image_set_index()

In [None]:
index = img_ids[0]
print(index)

In [None]:
ann = ds._load_annotation(index)

In [None]:
import PIL
from matplotlib import pyplot as plt
def showGTBox(img_id, imdb, show_class=True, dpi = 200):
    imIdx = imdb.image_index[img_id]
    assert os.path.exists(imIdx), imIdx+' does not exist.'
    #print(imIdx)
    ann = imdb._load_annotation(imIdx)
    gt_boxes, gt_classes = ann['boxes'], ann['gt_classes']
    im2show = np.array(PIL.Image.open(imIdx))
    fig = plt.figure(frameon=False)
    fig.set_size_inches(im2show.shape[1] / dpi, im2show.shape[0] / dpi)
    ax = plt.Axes(fig, [0., 0., 1., 1.])
    ax.axis('off')
    fig.add_axes(ax)
    ax.imshow(im2show)
    
    #draw gt first
    for j in range(len(gt_classes)):
        ax.add_patch(
        plt.Rectangle((gt_boxes[j][0], gt_boxes[j][1]),
                      gt_boxes[j][2] - gt_boxes[j][0],
                      gt_boxes[j][3] - gt_boxes[j][1],
                      fill=False, edgecolor='g',
                      linewidth=3.0, alpha=0.8))
        if show_class:
            ax.text(gt_boxes[j][0], gt_boxes[j][1] - 2,
            '%s %f'%(imdb.classes[gt_classes[j]], 1.0),
            fontsize=10,
            #family='serif',
            bbox=dict(facecolor='g', alpha=0.4, pad=0, edgecolor='none'),
                color='white')
    

In [None]:
showGTBox(img_id=4300,imdb=ds,dpi=40)