In [3]:
import numpy as np
import sys
import os
import math

In [4]:
def labelread_sequence(file_path):
    bbox = ('bbox', ['left', 'top', 'right', 'bottom'])
    dimensions = ('dimensions', ['height', 'width', 'length'])
    location = ('location', ['x', 'y', 'z'])
    keys = ['type', 'truncated', 'occluded', 'alpha', bbox,
            dimensions, location, 'rotation_y', 'score']
    labels = list()
    for line in open(file_path, 'r'):
        vals = line.split()
        l, _ = vals_to_dict(vals, keys)
        labels.append(l)
    return labels

def vals_to_dict(vals, keys, vals_n=0):
    '''

    '''
    out = dict()
    for key in keys:
        if isinstance(key, str): 
            try:
                val = float(vals[vals_n])
            except:
                val = vals[vals_n]
            data = val
            key_name = key
            vals_n += 1
        else:
            data, vals_n = vals_to_dict(vals, key[1], vals_n)
            key_name = key[0]
        out[key_name] = data
        if vals_n >= len(vals):
            break
    return out, vals_n

def labelread(file_path):
    bbox = ('bbox', ['left', 'top', 'right', 'bottom'])
    dimensions = ('dimensions', ['height', 'width', 'length'])
    location = ('location', ['x', 'y', 'z'])
    keys = ['type', 'truncated', 'occluded', 'alpha', bbox,
            dimensions, location, 'rotation_y', 'score']
    labels = list()
    for line in open(file_path, 'r'):
        vals = line.split()
        l, _ = vals_to_dict(vals, keys)
        labels.append(l)
    return labels

def calibread(file_path):
    out = dict()
    for line in open(file_path, 'r'):
        line = line.strip()
        if line == '' or line[0] == '#':
            continue
        val = line.split(':')
        assert len(val) == 2, 'Wrong file format, only one : per line!'
        key_name = val[0].strip()
        val = np.asarray(val[-1].strip().split(' '), dtype='f8')
        assert len(val) in [12, 9], "Wrong file format, wrong number of numbers!"
        if len(val) == 12:
            out[key_name] = val.reshape(3, 4)
        elif len(val) == 9:
            out[key_name] = val.reshape(3, 3)
    return out

In [5]:
kitti_data_path = '/home/zijieguo/project/data_object_velodyne/'
label_path = kitti_data_path + 'training/label_2/000001.txt'
calib_path = kitti_data_path + 'training/calib/000001.txt'
labels = labelread(label_path)
calib = calibread(calib_path)

In [6]:
labels

[{'type': 'Truck',
  'truncated': 0.0,
  'occluded': 0.0,
  'alpha': -1.57,
  'bbox': {'left': 599.41, 'top': 156.4, 'right': 629.75, 'bottom': 189.25},
  'dimensions': {'height': 2.85, 'width': 2.63, 'length': 12.34},
  'location': {'x': 0.47, 'y': 1.49, 'z': 69.44},
  'rotation_y': -1.56},
 {'type': 'Car',
  'truncated': 0.0,
  'occluded': 0.0,
  'alpha': 1.85,
  'bbox': {'left': 387.63, 'top': 181.54, 'right': 423.81, 'bottom': 203.12},
  'dimensions': {'height': 1.67, 'width': 1.87, 'length': 3.69},
  'location': {'x': -16.53, 'y': 2.39, 'z': 58.49},
  'rotation_y': 1.57},
 {'type': 'Cyclist',
  'truncated': 0.0,
  'occluded': 3.0,
  'alpha': -1.65,
  'bbox': {'left': 676.6, 'top': 163.95, 'right': 688.98, 'bottom': 193.93},
  'dimensions': {'height': 1.86, 'width': 0.6, 'length': 2.02},
  'location': {'x': 4.59, 'y': 1.32, 'z': 45.84},
  'rotation_y': -1.55},
 {'type': 'DontCare',
  'truncated': -1.0,
  'occluded': -1.0,
  'alpha': -10.0,
  'bbox': {'left': 503.89, 'top': 169.71, 

In [7]:
calib

{'P0': array([[721.5377,   0.    , 609.5593,   0.    ],
        [  0.    , 721.5377, 172.854 ,   0.    ],
        [  0.    ,   0.    ,   1.    ,   0.    ]]),
 'P1': array([[ 721.5377,    0.    ,  609.5593, -387.5744],
        [   0.    ,  721.5377,  172.854 ,    0.    ],
        [   0.    ,    0.    ,    1.    ,    0.    ]]),
 'P2': array([[7.215377e+02, 0.000000e+00, 6.095593e+02, 4.485728e+01],
        [0.000000e+00, 7.215377e+02, 1.728540e+02, 2.163791e-01],
        [0.000000e+00, 0.000000e+00, 1.000000e+00, 2.745884e-03]]),
 'P3': array([[ 7.215377e+02,  0.000000e+00,  6.095593e+02, -3.395242e+02],
        [ 0.000000e+00,  7.215377e+02,  1.728540e+02,  2.199936e+00],
        [ 0.000000e+00,  0.000000e+00,  1.000000e+00,  2.729905e-03]]),
 'R0_rect': array([[ 0.9999239 ,  0.00983776, -0.00744505],
        [-0.0098698 ,  0.9999421 , -0.00427846],
        [ 0.00740253,  0.00435161,  0.9999631 ]]),
 'Tr_velo_to_cam': array([[ 7.533745e-03, -9.999714e-01, -6.166020e-04, -4.069766e-03],


In [8]:
polys = list()
for bbox in labels:
        poly = dict()

        poly2d = dict()
        poly2d['class'] = bbox['type']
        poly2d['truncated'] = bbox['truncated']
        poly2d['poly'] = np.array([[bbox['bbox']['left'], bbox['bbox']['top']],
                                 [bbox['bbox']['right'], bbox['bbox']['top']],
                                 [bbox['bbox']['right'], bbox['bbox']['bottom']],
                                 [bbox['bbox']['left'], bbox['bbox']['bottom']]],
                                dtype='int32')
        poly["label_2D"] = poly2d

        poly3d = dict()
        poly3d['class'] = bbox['type']
        location = np.asarray([bbox['location']['x'],
                               bbox['location']['y'],
                               bbox['location']['z']], dtype='float32')
        r_y = bbox['rotation_y']
        Rmat = np.asarray([[math.cos(r_y), 0, math.sin(r_y)], [0, 1, 0],
                           [-math.sin(r_y), 0, math.cos(r_y)]],
                          dtype='float32')
        length = bbox['dimensions']['length']
        width = bbox['dimensions']['width']
        height = bbox['dimensions']['height']
        p0 = np.dot(Rmat, np.asarray(
            [length / 2.0, 0, width / 2.0], dtype='float32'))
        p1 = np.dot(Rmat, np.asarray(
            [-length / 2.0, 0, width / 2.0], dtype='float32'))
        p2 = np.dot(Rmat, np.asarray(
            [-length / 2.0, 0, -width / 2.0], dtype='float32'))
        p3 = np.dot(Rmat, np.asarray(
            [length / 2.0, 0, -width / 2.0], dtype='float32'))
        p4 = np.dot(Rmat, np.asarray(
            [length / 2.0, -height, width / 2.0], dtype='float32'))
        p5 = np.dot(Rmat, np.asarray(
            [-length / 2.0, -height, width / 2.0], dtype='float32'))
        p6 = np.dot(Rmat, np.asarray(
            [-length / 2.0, -height, -width / 2.0], dtype='float32'))
        p7 = np.dot(Rmat, np.asarray(
            [length / 2.0, -height, -width / 2.0], dtype='float32'))
        poly3d['points'] = np.array(location + [p0, p1, p2, p3, p4, p5, p6, p7])
        poly3d['lines'] = [[0, 3, 7, 4, 0], [1, 2, 6, 5, 1],
                         [0, 1], [2, 3], [6, 7], [4, 5]]
        poly3d['colors'] = [[255, 0, 0], [0, 0, 255], [
            255, 0, 0], [255, 0, 0], [255, 0, 0], [255, 0, 0]]
        poly3d['P0_mat'] = calib['P2']
        poly3d['center'] = location
        poly3d['l'] = length
        poly3d['w'] = width
        poly3d['h'] = height
        poly3d['r_y'] = r_y
        poly["label_3D"] = poly3d

        polys.append(poly)

In [9]:
examples = []
for img_id in ['0','1']:
    for poly in polys:
        label_2d = poly['label_2D']
        if label_2d['truncated'] < 0.5  and label_2d['class'] == 'Car':
            poly['img_id'] = img_id
            examples.append(poly)

num_examples = len(examples)


In [10]:
examples[1]

{'label_2D': {'class': 'Car', 'truncated': 0.0, 'poly': array([[387, 181],
         [423, 181],
         [423, 203],
         [387, 203]], dtype=int32)},
 'label_3D': {'class': 'Car',
  'points': array([[-15.593532  ,   2.39      ,  56.64575   ],
         [-15.59647   ,   2.39      ,  60.335747  ],
         [-17.466469  ,   2.39      ,  60.334255  ],
         [-17.463531  ,   2.39      ,  56.644257  ],
         [-15.593532  ,   0.72000015,  56.64575   ],
         [-15.59647   ,   0.72000015,  60.335747  ],
         [-17.466469  ,   0.72000015,  60.334255  ],
         [-17.463531  ,   0.72000015,  56.644257  ]], dtype=float32),
  'lines': [[0, 3, 7, 4, 0], [1, 2, 6, 5, 1], [0, 1], [2, 3], [6, 7], [4, 5]],
  'colors': [[255, 0, 0],
   [0, 0, 255],
   [255, 0, 0],
   [255, 0, 0],
   [255, 0, 0],
   [255, 0, 0]],
  'P0_mat': array([[7.215377e+02, 0.000000e+00, 6.095593e+02, 4.485728e+01],
         [0.000000e+00, 7.215377e+02, 1.728540e+02, 2.163791e-01],
         [0.000000e+00, 0.000000e+0

In [12]:
label_2D = examples[1]['label_2D']
label_3D = examples[1]['label_3D']
bbox = label_2D['poly']
u_min = bbox[0, 0] # (left)
u_max = bbox[1, 0] # (rigth)
v_min = bbox[0, 1] # (top)
v_max = bbox[2, 1] # (bottom)

In [13]:
input_2Dbbox = np.array([u_min, u_max, v_min, v_max])
row_mask = np.logical_and(img_point[])

array([[387, 181],
       [423, 181],
       [423, 203],
       [387, 203]], dtype=int32)