In [94]:
import math
import numpy as np
import xml.etree.ElementTree as ET

## Get Camera Calibration Parameters

In [90]:
with open("2011_09_26/calib_cam_to_cam.txt",'r') as f:
    calib = f.readlines()

In [91]:
K_01 = np.array([float(x) for x in calib[11].strip().split(' ')[1:]]).reshape((3,3))

In [92]:
fx, fy, cx, cy = K_01[0][0], K_01[1][1], K_01[0][2], K_01[1][2]

## Calculate Alpha

In [73]:
def calculate_alpha(tx, tz, f_x, c_x):
    """
    Calculate the Alpha angle for an object based on its position and camera parameters.

    :param tx: Lateral position of the object relative to the camera
    :param tz: Forward distance from the camera to the object
    :param f_x: Camera's focal length in the x-direction
    :param c_x: x-coordinate of the camera's optical center
    :return: Alpha angle in radians
    """
    if tz == 0:
        raise ValueError("tz cannot be zero to avoid division by zero.")

    # Calculate projected point on the image plane
    u = f_x * (tx / tz) + c_x
    
    # Calculate the offset from the center of the image
    offset = u - c_x
    
    # Calculate and return the Alpha angle
    alpha = math.atan2(offset, f_x)
    
    return alpha

## Calculate Bounding Box Parameters

In [118]:
# TODO: Bounding boxes are wrong

In [None]:
def project_to_image(points_3D):
    """
    Project 3D points to 2D using the camera intrinsic matrix.
    """
    points_3D_homogeneous = np.vstack((points_3D, np.ones((1, points_3D.shape[1]))))  # Convert to homogeneous coordinates
    points_2D_homogeneous = np.dot(K_01, points_3D_homogeneous)  # Apply camera matrix
    points_2D = points_2D_homogeneous[:2, :] / points_2D_homogeneous[2, :]  # Convert from homogeneous to 2D
    return points_2D

In [None]:
def calculate_bounding_box(corners_3D_world, pose, rotation_y, velo_to_cam, camera_matrix):
    """
    Calculate the 2D bounding box for a 3D object given its world coordinates,
    pose, rotation, and camera calibration parameters.
    
    Args:
    corners_3D_world: np.array shape (3, 8)
        The 3D bounding box corners of the object in world coordinates.
    pose: np.array shape (3,)
        The translation vector (x, y, z) of the object.
    rotation_y: float
        The rotation around the vertical axis in radians.
    velo_to_cam: np.array shape (3, 4)
        The transformation matrix from world coordinates to camera coordinates.
    camera_matrix: np.array shape (3, 3)
        The camera intrinsic matrix.

    Returns:
    tuple
        Bounding box coordinates (left, top, right, bottom).
    """
    # Create a rotation matrix for rotation around the y-axis
    c, s = np.cos(rotation_y), np.sin(rotation_y)
    R = np.array([
        [c, 0, s],
        [0, 1, 0],
        [-s, 0, c]
    ])

    # Rotate and translate the corners in the world coordinate system
    corners_3D_cam = R @ corners_3D_world  # Rotate corners
    corners_3D_cam += np.reshape(pose, (3, 1))  # Translate corners

    # Transform corners to camera coordinates
    corners_3D_cam = velo_to_cam[:3, :3] @ corners_3D_cam + velo_to_cam[:3, 3].reshape(3, 1)

    # Project the corners to the image plane
    corners_2D = project_to_image(corners_3D_cam, camera_matrix)

    # Calculate the bounding box in image coordinates
    left = np.min(corners_2D[0, :])
    right = np.max(corners_2D[0, :])
    top = np.min(corners_2D[1, :])
    bottom = np.max(corners_2D[1, :])

    return (left, top, right, bottom)

## Parse XML

In [117]:
# Load and parse the XML file
tree = ET.parse("2011_09_26/tracklet_labels.xml")
root = tree.getroot()

# List to hold all tracklets
objects = []

# Find tracklet root
for track_root in root.find('tracklets'):
    if track_root.tag != "item":
        continue
        
    print(f"objType: {objType}")
    
    objType, h, w, l, frame, poses, _  = list(track_root)
    
    frame = frame.text
    
    print(f"frame: {frame}")
    
    objType, h, w, l = objType.text, h.text, w.text, l.text

    count = int(poses[0].text)
    
    print(f"count: {count}")
    print("---")
    
    for i, p in enumerate(poses[2:]):
        print(f"index {i}:")
        tx, ty, tz, rx, ry, rz, _, occlusion, occlusion_kf, trunc, amt_occlusion, amt_occlusion_kf, amt_border_l, amt_border_r, amt_border_kf = list(p)
        
        tx, ty, tz, rx, ry, rz, occlusion, occlusion_kf, trunc, amt_occlusion, amt_occlusion_kf, amt_border_l, amt_border_r, amt_border_kf = tx.text, ty.text, tz.text, rx.text, ry.text, rz.text, occlusion.text, occlusion_kf.text, trunc.text, amt_occlusion.text, amt_occlusion_kf.text, amt_border_l.text, amt_border_r.text, amt_border_kf.text
        
        alpha = calculate_alpha(float(tx), float(tz), float(fx), float(cx))
        
        bbox = calculate_bounding_box(float(tx), float(ty), float(tz), float(w), float(h), float(l), float(ry), K_01)

        bbox_left, bbox_top, bbox_right, bbox_bottom = bbox
        
        o = f"{objType} {trunc} {occlusion} {alpha}" 
        o += f"{bbox_left} {bbox_top} {bbox_right} {bbox_bottom} {h} {w} {l}"
        o += f"{tx} {ty} {tz} {rz}"
        
        with open(f"2011_09_26/tracklet_labels/{str(i).zfill(6)}.txt", "a") as fp:
            fp.write(o + "\n")

        print(o)
        print()
        print()
    
    print("---")
    print()
    print()

objType: Van
frame: 0
count: 154
---
index 0:
Van 0 0 -1.4443052442338848-2950.0586629613385 -1144.486383770594 31169.967673422925 11842.266973735408 2 1.8232554 4.433885613.701017509034987 4.571364845086606 -1.7423590421676636 0.54469172949168099


index 1:
Van 0 0 -1.452552963475814-3082.8427554351256 -1203.475275511254 26402.5738719446 10085.072778287808 2 1.8232554 4.433885613.870616065076742 4.669081516534427 -1.6477950072567558 0.55076881578110437


index 2:
Van 0 0 -1.4606170463445722-3222.287866904061 -1265.4233028009621 22993.67324504312 8828.600029468213 2 1.8232554 4.433885614.040214621118496 4.7667981879822472 -1.5532309723458482 0.55684590207052787


index 3:
Van 0 0 -1.4685025431014136-3368.9080968538537 -1330.5588526275078 20435.015886378136 7885.514971676006 2 1.8232554 4.433885614.209813161996959 4.8645148506935172 -1.4586669458896238 0.56292298781661743


index 4:
Van 0 0 -1.4686548181690533-3398.688055072302 -1346.9682806617523 21093.228198915756 8164.617157277945 2 

Van 1 0 -1.4209614430404125-2082.5318785306054 -10812.460025969965 13767.320296754731 2602.287024406684 2.1953125 1.8952746 5.530314411.880669673172701 -9.7783172430565504 -1.7935811515426501 2.3385765166278087


index 78:
Van 1 0 -1.4199825686645249-2053.9401571278318 -10336.793603027127 13409.06901062327 2540.685474184326 2.1953125 1.8952746 5.530314411.709732329092029 -9.4613203443067189 -1.7795006973647531 2.3634730820602581


index 79:
Van 1 0 -1.4189749963010243-2025.1707212121964 -9874.52597098459 13060.909111735298 2478.7010337772504 2.1953125 1.8952746 5.530314411.538795010483005 -9.1443234927931041 -1.7654202452850072 2.3883696437828301


index 80:
Van 1 0 -1.4179374471343509-1996.2219043894638 -9425.098791665332 12722.420084030886 2416.330112899069 2.1953125 1.8952746 5.530314411.367857691873981 -8.8273266412794875 -1.7513397932052612 2.4132662055054022


index 81:
Van 1 0 -1.4151732774741608-1949.7803720622003 -9124.481449265084 12489.120026603157 2353.5624217612512 2.19531