Imports and Constants:

In [1]:
%matplotlib inline
from matplotlib import pyplot as plt
import cv2
import numpy as np
import pandas as pd
import os
from pathlib import Path
import os
from os.path import basename



image_dir = 'kitti/data_tracking_image_2/training/image_02/'
label_dir = 'kitti/data_tracking_label_2/training/label_02/'
calib_dir = 'kitti/data_tracking_calib/training/calib/'
drawn_images_dir = 'kitti/drawn_image/'
number_of_drives = len(os.listdir(calib_dir)) # number of folders that each of them containing  Tracking images


Using this function we can read the labels into DataFrame:

In [3]:
def read_label_3d(label_file):
    """Read and parse label information of kitti 3d object detection dataset.

    :param label_file: filname like kitti_3d/{training,testing}/label_2/id.txt
    Returns Pandas DataFrame
    Data Format Description
    =======================
    The data for training and testing can be found in the corresponding folders.
    The sub-folders are structured as follows:
      - image_02/ contains the left color camera images (png)
      - label_02/ contains the left color camera label files (plain text files)
      - calib/ contains the calibration for all four cameras (plain text file)
    The label files contain the following information, which can be read and
    written using the matlab tools (readLabels.m, writeLabels.m) provided within
    this devkit. All values (numerical or strings) are separated via spaces,
    each row corresponds to one object. The 15 columns represent:
    #Values    Name      Description
    ----------------------------------------------------------------------------
       1    type         Describes the type of object: 'Car', 'Van', 'Truck',
                         'Pedestrian', 'Person_sitting', 'Cyclist', 'Tram',
                         'Misc' or 'DontCare'
       1    truncated    Float from 0 (non-truncated) to 1 (truncated), where
                         truncated refers to the object leaving image boundaries
       1    occluded     Integer (0,1,2,3) indicating occlusion state:
                         0 = fully visible, 1 = partly occluded
                         2 = largely occluded, 3 = unknown
       1    alpha        Observation angle of object, ranging [-pi..pi]
       4    bbox         2D bounding box of object in the image (0-based index):
                         contains left, top, right, bottom pixel coordinates
       3    dimensions   3D object dimensions: height, width, length (in meters)
       3    location     3D object location x,y,z in camera coordinates (in meters)
       1    rotation_y   Rotation ry around Y-axis in camera coordinates [-pi..pi]
       1    score        Only for results: Float, indicating confidence in
                         detection, needed for p/r curves, higher is better.
    """
    return pd.read_csv(label_file, sep=" ", names=['frame_number','tracking_id','label', 'truncated', 'occluded', 'alpha', 'bbox_xmin', 'bbox_ymin', 'bbox_xmax', 'bbox_ymax', 'dim_height', 'dim_width', 'dim_length', 'loc_x', 'loc_y', 'loc_z', 'rotation_y', 'score'])

This function receives the dimensions, location, and the yaw of boundary box and returns it in cam2 coordinates:

In [4]:
# 3D box under cam2 coordinates
def compute_3D_box_cam2(h,w,l,x,y,z,yaw):
    """
    Return:3Xn in cam2 coordinate
    """
    # Create rotation matrix R
    R = np.array([[np.cos(yaw), 0, np.sin(yaw)],[0,1,0],[-np.sin(yaw),0,np.cos(yaw)]])
    # Calculate 8 vertex coordinates
    x_corners = [l/2,l/2,-l/2,-l/2,l/2,l/2,-l/2,-l/2]
    y_corners = [0,0,0,0,-h,-h,-h,-h]
    z_corners = [w/2,-w/2,-w/2,w/2,w/2,-w/2,-w/2,w/2]
    # Use rotation matrix to transform coordinates
    corners_3d_cam2 = np.dot(R, np.vstack([x_corners, y_corners, z_corners]))
    # Finally add the center point
    corners_3d_cam2 += np.vstack([x, y, z])
    return corners_3d_cam2

This part simply reads calibration file from the path specified:

In [5]:
def calibration_file_p2(path):
    """Read calibration matrix"""
    lines = open(path,"r").readlines()
    line2 = lines[2].split()
    values = np.zeros((3,4))
    for count,value in enumerate(line2[1:]):
        values[ int(count/4) , count % 4] = value;
    return values

Given the image and a bounding box this function draw it on the image:

In [6]:
def draw_box(img, box, p2, index, driveNumber, frame_number):
    """Draw bounding box to image
    Keyword arguments:
    img          -- Input image (array)
    box          -- bbox info.one row of dataFrame that created by **read_label_3d** function (panda series) 
    p2           -- Camera calibration matrix (array)
    index        -- object index
    dataFrame    -- label information of kitti 3d object detection dataset (Pandas DataFrame)
    driveNumber  -- Folder number that contains frames (int)
    frame_number -- Frame number (int)
    """
    l = box.dim_length
    h = box.dim_height
    w = box.dim_width
    loc_x = box.loc_x
    loc_y = box.loc_y
    loc_z = box.loc_z
    yaw = box.rotation_y
    x_corners = np.array( [l/2, l/2, -l/2, -l/2,  l/2,  l/2, -l/2, -l/2] )+ loc_x
    y_corners = np.array( [0,   0,    0,    0,   -h,   -h,   -h,   -h  ] )+ loc_y
    z_corners = np.array( [w/2, -w/2, -w/2, w/2, w/2, -w/2, -w/2, w/2  ] )+ loc_z
    corners_3d_cam2 = compute_3D_box_cam2(h, w, l, loc_x, loc_y, loc_z, yaw)
    corners_3d_cam2 = np.vstack((corners_3d_cam2,np.ones((1,8))))
    points = p2.dot( corners_3d_cam2)
    finalPoints = []
    for i in range(8):
        temp = tuple( ((points[:2,i])/(points[2,i])).astype(int) )
        finalPoints.append(temp)
    img =cv2.line(img,finalPoints[0],finalPoints[1],(0,0,255),2)
    img =cv2.line(img,finalPoints[1],finalPoints[2],(0,0,255),2)
    img =cv2.line(img,finalPoints[2],finalPoints[3],(0,0,255),2)
    img =cv2.line(img,finalPoints[3],finalPoints[0],(0,0,255),2)

    img =cv2.line(img,finalPoints[4],finalPoints[5],(0,0,255),2)
    img =cv2.line(img,finalPoints[5],finalPoints[6],(0,0,255),2)
    img =cv2.line(img,finalPoints[6],finalPoints[7],(0,0,255),2)
    img =cv2.line(img,finalPoints[7],finalPoints[4],(0,0,255),2)

    img =cv2.line(img,finalPoints[0],finalPoints[4],(0,0,255),2)
    img =cv2.line(img,finalPoints[1],finalPoints[5],(0,0,255),2)
    img =cv2.line(img,finalPoints[2],finalPoints[6],(0,0,255),2)
    img =cv2.line(img,finalPoints[3],finalPoints[7],(0,0,255),2)

This function iterates over bounding boxes related to each frame:

In [7]:
def draw_frame( dataFrame, driveNumber, frame_number, p2,show =False):
    """ Return frame matrix with 3d bounding box
    Keyword arguments:
    dataFrame    -- label information of kitti 3d object detection dataset (Pandas dataFrame)
    driveNumber  -- Folder number that contains frames (int)
    frame_number -- Frame number (int)
    p2           -- Camera calibration matrix (array)
    show         -- print result
    """
    frame_data = dataFrame.loc[ (dataFrame.frame_number == frame_number) & (dataFrame.label != "DontCare") & (dataFrame.label != "Pedestrian") & (dataFrame.truncated == 0) ]
    I = cv2.imread( image_dir + number_length_fixer(driveNumber) + "/" + number_length_fixer(frame_number,6) + ".png" )
    for index, box in frame_data.iterrows():
        draw_box(I,box,p2,index,driveNumber,frame_number)
        # Print result in frame 70
        if frame_number == 70 and show == True:
            plt.figure(figsize = (30,101.6))
            plt.imshow(I,interpolation='nearest')
            plt.show()
            print("frame_number: ", frame_number)
            print("box is:")
            print(box)
    return I

def number_length_fixer(num, length = 4):
    """Return number with Intended zeros before the number"""
    number_of_zero = length - len ( str(num) )
    return "" + ("0" * number_of_zero) + str(num)


This function iterates over frames of specified input drive:

In [8]:
def draw_drive(drive_number):
    """Save pic.png with bounding box for each frame
    Keyword arguments:
    drive_number -- Folder number that contains frames (int)
    """
    labels = read_label_3d( label_dir + number_length_fixer(drive_number, 4) + ".txt" )
    p2 = calibration_file_p2(calib_dir + number_length_fixer(drive_number) + ".txt")
    # number of frames in tracking folder.
    number_of_frames = len(os.listdir( image_dir + number_length_fixer(drive_number,4) ))
    Path(drawn_images_dir + number_length_fixer(drive_number) + "/").mkdir(parents=True, exist_ok=True) 
    for frame_number in range(number_of_frames):
        I = draw_frame(labels, drive_number, frame_number, p2, show=False)
        image_path = drawn_images_dir + number_length_fixer(drive_number) + "/" + number_length_fixer(frame_number,6) + ".png"
        cv2.imwrite( image_path , I ) 



In [10]:
draw_drive(0)

In [11]:
draw_drive(0)

This function draws boundary boxes on all images of all of the drives:

In [None]:
def draw_all():
    for drive_number in range(number_of_drives):
        draw_drive(drive_number)

draw_all()