In [1]:
from __future__ import division
import argparse
import os
import os.path as osp
import warnings
import xml.dom.minidom as minidom
import scipy.io as sio
import numpy as np
import json
import PIL
from PIL import Image
from tqdm import tqdm

In [2]:
imagenetHome = '../data/imagenet'
devkit_path = osp.join(imagenetHome,'devkit')

In [3]:
synsets_image = sio.loadmat(os.path.join(devkit_path, 'data', 'meta_det.mat'))
synsets_video = sio.loadmat(os.path.join(devkit_path, 'data', 'meta_vid.mat'))
classes_image = ('__background__',)
wnid_image = (0,)
classes = ('__background__',)
wnid = (0,)
for i in range(200):
    classes_image = classes_image + (synsets_image['synsets'][0][i][2][0],)
    wnid_image = wnid_image + (synsets_image['synsets'][0][i][1][0],)

for i in range(30):
    classes = classes + (synsets_video['synsets'][0][i][2][0],)
    wnid = wnid + (synsets_video['synsets'][0][i][1][0],)

wnid_to_ind_image = dict(zip(wnid_image, range(201)))
class_to_ind_image = dict(zip(classes_image, range(201)))

wnid_to_ind = dict(zip(wnid, range(31)))
class_to_ind = dict(zip(classes, range(31)))

#check for valid intersection between video and image classes
valid_image_flag = [0]*201

for i in range(1,201):
    if wnid_image[i] in wnid_to_ind:
        valid_image_flag[i] = 1

In [4]:
def load_imagenet_annotation(index_image, img_prefix):
    """
    Load image and bounding boxes info from txt files of imagenet.
    """
    # Get image infos
    filename = osp.join(img_prefix, index_image)
    assert os.path.exists(filename),'%s'%(filename)
    with Image.open(filename) as img:
        width, height = img.size
    ##########################################
    # Get ann infos
    index = index_image.replace('Data','Annotations').replace('.JPEG','.xml')
    filename = osp.join(img_prefix, index)
    assert os.path.exists(filename),'%s'%(filename)
    # print 'Loading: {}'.format(filename)
    def get_data_from_tag(node, tag):
        return node.getElementsByTagName(tag)[0].childNodes[0].data

    with open(filename) as f:
        data = minidom.parseString(f.read())

    objs = data.getElementsByTagName('object')
    num_objs = len(objs)
    #filter the objects not in video synsets.
    used_objs = []
    for id, obj in enumerate(objs):
        if str(get_data_from_tag(obj, "name")).lower().strip() in wnid_to_ind:
            used_objs.append(obj)
    objs = used_objs
    num_objs = len(objs)
    
    boxes = np.zeros((num_objs, 4), dtype=np.float32)
    gt_classes = np.zeros((num_objs), dtype=np.int64)

    # Load object bounding boxes into a data frame.
    for ix, obj in enumerate(objs):
        x1 = min(max(float(get_data_from_tag(obj, 'xmin')),0), width-1)
        y1 = min(max(float(get_data_from_tag(obj, 'ymin')),0), height-1)
        x2 = min(max(float(get_data_from_tag(obj, 'xmax')),0), width-1)
        y2 = min(max(float(get_data_from_tag(obj, 'ymax')),0), height-1)
        cls = wnid_to_ind[
                str(get_data_from_tag(obj, "name")).lower().strip()]
        boxes[ix, :] = [x1, y1, x2, y2]
        gt_classes[ix] = cls
    
    rtv = {
        'filename':index_image,
        'width':width,
        'height':height,
        'ann': {
            'bboxes':boxes.tolist(),
            'labels':gt_classes.tolist(),
            }
        }
    return rtv

In [5]:
def getCustomAnnByImagenetIndex(index,img_prefix):
    '''
    Annotation format:
    [
        {
            'filename': 'a.jpg',
            'width': 1280,
            'height': 720,
            'ann': {
                'bboxes': <np.ndarray> (n, 4),
                'labels': <np.ndarray> (n, ),
                'bboxes_ignore': <np.ndarray> (k, 4),
                'labels_ignore': <np.ndarray> (k, 4) (optional field)
            }
        },
        ...
    ]
    
    Args:
        index(str): relative file path.
        
    '''
    return load_imagenet_annotation(index,img_prefix)

In [6]:
def ImageSet2CustomAnn_DETVID(imagenetHome, imagesetPath, cumstomStyleAnnPath):
    '''Convert Imagenet DETVID annotations to custom style annotations.
    
       The imagenet dataset is configured by 1 txt file in "ImageSets"
       directory(contain image index), while custom style annotations 
       have all anns in 1 file.
       
    Args:
        imagesetPath(str): imagenet imageset path.
        cumstomStyleAnnPath(str): the path to save annotations.
    '''
    assert cumstomStyleAnnPath.endswith('.json')
    file_indexes = []
    with open(imagesetPath,'r') as fr:
        lines = fr.readlines()
        for l in lines:
            items = l.split()
            '''
            if len(items)==0:
                continue
            if len(items)!=2:
                print('Warning: there shoule be 2 items in a line, but %d here'%(len(items)))
                print('line items:%s'%(l))
                continue
            if items[1]=='1':
            '''
            file_indexes.append(items[0].replace('data/imagenet/ILSVRC/','')+'.JPEG')
    print('total number of files are %d.'%(len(file_indexes)))
    
    ann_list = []
    for i in tqdm(range(len(file_indexes))):
        index = file_indexes[i]
        ann_dict = getCustomAnnByImagenetIndex(index,imagenetHome)
        ann_list.append(ann_dict)
        
    with open(cumstomStyleAnnPath,'w') as fw:
        json.dump(ann_list, fw, indent=2)        

In [7]:
if __name__=='__main__':
    imagesetPath = '/media/yelyu/18339a64-762e-4258-a609-c0851cd8163e/YeLyu/Work/FastVOD/data/imagenet/ILSVRC/ImageSets/trainr_DETVID.txt'
    cumstomStyleAnnPath = imagesetPath.replace('.txt','.json')
    ImageSet2CustomAnn_DETVID(imagenetHome, imagesetPath,cumstomStyleAnnPath)

  0%|          | 3/111473 [00:00<1:07:52, 27.37it/s]

total number of files are 111473.


  "Skipping tag %s" % (size, len(data), tag))
  "Skipping tag %s" % (size, len(data), tag))
  "Skipping tag %s" % (size, len(data), tag))
  "Skipping tag %s" % (size, len(data), tag))
  "Skipping tag %s" % (size, len(data), tag))
  "Skipping tag %s" % (size, len(data), tag))
  "Skipping tag %s" % (size, len(data), tag))
  "Skipping tag %s" % (size, len(data), tag))
  "Skipping tag %s" % (size, len(data), tag))
  "Skipping tag %s" % (size, len(data), tag))
  "Skipping tag %s" % (size, len(data), tag))
  "Skipping tag %s" % (size, len(data), tag))
100%|██████████| 111473/111473 [50:59<00:00, 36.43it/s]  
