In [8]:
import scipy.io
anno_box = scipy.io.loadmat('/home/yuxuan/gpnn/tmp/hico/hico_20160224_det/anno_bbox.mat')

In [57]:
import numpy as np
from tqdm import tqdm
import scipy.io as scio
import json

def write(file_name, data, mode='wb'):
    with open(file_name, mode) as f:
        f.write(data)
        
class NumpyAwareJSONEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.ndarray):
            if obj.ndim == 1:
                return obj.tolist()
            else:
                return [self.default(obj[i]) for i in range(obj.shape[0])]
        elif isinstance(obj, np.int64):
            return int(obj)
        elif isinstance(obj, np.int32):
            return int(obj)
        elif isinstance(obj, np.int16):
            return int(obj)
        elif isinstance(obj, np.float64):
            return float(obj)
        elif isinstance(obj, np.float32):
            return float(obj)
        elif isinstance(obj, np.float16):
            return float(obj)
        elif isinstance(obj, np.uint64):
            return int(obj)
        elif isinstance(obj, np.uint32):
            return int(obj)
        elif isinstance(obj, np.uint16):
            return int(obj)
        return json.JSONEncoder.default(self, obj)

In [70]:
class ConvertMat2Json():
    def __init__(self):
        #self.const = const
        self.anno = scio.loadmat('/home/yuxuan/gpnn/tmp/hico/hico_20160224_det/anno.mat')
        self.anno_bbox = scio.loadmat('/home/yuxuan/gpnn/tmp/hico/hico_20160224_det/anno_bbox.mat')
        
    def create_hoi_list(self):
        num_hoi = self.anno['list_action'].shape[0]
        hoi_list = [None]*num_hoi
        for i in range(num_hoi):
            hoi_list[i] = {
                'id': str(i+1).zfill(3),
                'object': self.anno['list_action'][i,0][0][0],
                'verb': self.anno['list_action'][i,0][1][0],
            }
        
        return hoi_list

    def get_image_size(self,i,subset):
        W = self.anno_bbox[f'bbox_{subset}'][0,i][1][0,0][0][0,0]
        H = self.anno_bbox[f'bbox_{subset}'][0,i][1][0,0][1][0,0]
        C = self.anno_bbox[f'bbox_{subset}'][0,i][1][0,0][2][0,0]
        image_size = [int(v) for v in [H,W,C]]
        return image_size

    def get_hoi_bboxes(self,i,subset):
        num_hois = self.anno_bbox[f'bbox_{subset}'][0,i][2].shape[1]
        hois = [None]*num_hois
        for j in range(num_hois):
            hoi_data = self.anno_bbox[f'bbox_{subset}'][0,i][2][0,j]
            
            hoi_id = str(hoi_data[0][0,0]).zfill(3)    
        
            num_boxes = hoi_data[1].shape[1]
            human_bboxes = [None]*num_boxes
            for b in range(num_boxes):
                human_bboxes[b] = \
                    [int(hoi_data[1][0,b][k][0,0]-1) for k in [0,2,1,3]]
            
            num_boxes = hoi_data[2].shape[1]
            object_bboxes = [None]*num_boxes
            for b in range(num_boxes):
                object_bboxes[b] = \
                    [int(hoi_data[2][0,b][k][0,0]-1) for k in [0,2,1,3]]

            connections = (hoi_data[3]-1).tolist()

            invis = int(hoi_data[4][0,0])

            hois[j] = {
                'id': hoi_id,
                'human_bboxes': human_bboxes,
                'object_bboxes': object_bboxes,
                'connections': connections,
                'invis': invis,
            }
        
        return hois

    def create_anno_list(self):
        anno_list = []
        for subset in ['train','test']:
            print(f'Adding {subset} data to anno list ...')
            num_samples = self.anno[f'anno_{subset}'].shape[1]
            for i in tqdm(range(num_samples)):
                image_jpg = self.anno[f'list_{subset}'][i][0][0]
                
                if image_jpg.endswith('.jpg'):
                    global_id = image_jpg[:-4]
                else:
                    assert(False), 'Image extension is not .jpg'

                anno = {
                    'global_id': global_id,
                    'image_path_postfix': f'{subset}2015/{image_jpg}',
                    'image_size': self.get_image_size(i,subset),
                    'hois': self.get_hoi_bboxes(i,subset)
                }

                anno['pos_hoi_ids'] = [str(k[0]+1).zfill(3) for k in \
                    np.argwhere(self.anno[f'anno_{subset}'][:,i]==1).tolist()]
                anno['neg_hoi_ids'] = [str(k[0]+1).zfill(3) for k in \
                    np.argwhere(self.anno[f'anno_{subset}'][:,i]==-1).tolist()]

                anno_list.append(anno)

        return anno_list

    def convert(self):
        print('Creating anno list ...')
        anno_list = self.create_anno_list()
        data = json.dumps(anno_list,cls=NumpyAwareJSONEncoder, sort_keys=True, indent=4).encode()
        write("/home/yuxuan/anno_list.json", data)

        print('Creating hoi list ...')
        hoi_list = self.create_hoi_list()
        data = json.dumps(hoi_list,cls=NumpyAwareJSONEncoder, sort_keys=True, indent=4).encode()
        write("/home/yuxuan/hoi_list.json", data)

        print('Creating object list ...')
        object_list = sorted(list(set([hoi['object'] for hoi in hoi_list])))
        for i,obj in enumerate(object_list):
            object_list[i] = {
                'id': str(i+1).zfill(3),
                'name': obj
            }
        
        data = json.dumps(object_list,cls=NumpyAwareJSONEncoder, sort_keys=True, indent=4).encode()
        write("/home/yuxuan/object_list.json", data)
        
        print('Creating verb list ...')
        verb_list = sorted(list(set([hoi['verb'] for hoi in hoi_list])))
        for i,verb in enumerate(verb_list):
            verb_list[i] = {
                'id': str(i+1).zfill(3),
                'name': verb
            }
        
        data = json.dumps(verb_list,cls=NumpyAwareJSONEncoder, sort_keys=True, indent=4).encode()
        write("/home/yuxuan/verb_list.json", data)

In [71]:
converter = ConvertMat2Json()
converter.convert()

  4%|▍         | 1523/38118 [00:00<00:04, 7671.08it/s]

Creating anno list ...
Adding train data to anno list ...


100%|██████████| 38118/38118 [00:05<00:00, 6637.03it/s]
 15%|█▌        | 1471/9658 [00:00<00:01, 7364.72it/s]

Adding test data to anno list ...


100%|██████████| 9658/9658 [00:01<00:00, 4835.88it/s]


Creating hoi list ...
Creating object list ...
Creating verb list ...
