In [1]:
%load_ext autoreload
%autoreload 2

import os
from pathlib import Path

from overcooked_ai.dataset_types import DetectionDataset

HOME_DIR = Path(os.environ.get("HOME", "/home/mimic"))
SOURCE_DIR: Path = HOME_DIR / "Overcooked2_1-1_jpeg/"
ds_gt_path = SOURCE_DIR / "detection_dataset.mar2025.json"
ds_gt = DetectionDataset.load_from_json(ds_gt_path)

In [27]:
# Estimate camera intrinsics and distortion from 2D-3D correspondences using OpenCV

import numpy as np
import cv2

from overcooked_ai.game_maps import world_1_1_tile_object_ids
from overcooked_ai.grid_homography import apply_homography, match_euclidean_dist_linear_sum_assignment_xycats
from overcooked_ai.dataset_ops import convert_from_annotations_to_frame_coord_xycats, filter_tile_annotations
from overcooked_ai.type_conversions import convert_from_world_tiles_to_xycats

world_1_1_grid_xycats = convert_from_world_tiles_to_xycats(world_1_1_tile_object_ids)

per_entry_frame_space_xys: list[list[tuple[float, float]]] = []
per_entry_grid_space_xyzs: list[list[tuple[float, float, float]]] = []

for entry in ds_gt.entries[::20]:
    # Load homography
    H_grid_frame = np.array(entry.H_grid_img_vector).reshape(3, 3)
    H_frame_grid = np.linalg.inv(H_grid_frame)

    # Convert tile annotations into grid coordinates
    tile_frame_xycats = convert_from_annotations_to_frame_coord_xycats(filter_tile_annotations(entry.annotations))
    tile_frame_hxys = np.hstack((tile_frame_xycats[:, :2], np.ones((tile_frame_xycats.shape[0], 1))))
    tile_grid_hxys = apply_homography(H_frame_grid, tile_frame_hxys)
    tile_grid_xycats = np.hstack((tile_grid_hxys[:, :2], tile_frame_xycats[:, 2:]))

    # Associate with ground truth tile labels, and construct 2D-3D correspondences
    matched_tile_to_gt_idxs = match_euclidean_dist_linear_sum_assignment_xycats(tile_grid_xycats, world_1_1_grid_xycats)
    frame_space_xys = []
    grid_space_xyzs = []
    for tile_idx, gt_idx in matched_tile_to_gt_idxs:
        frame_space_xys.append((tile_frame_xycats[tile_idx, 0], tile_frame_xycats[tile_idx, 1]))
        grid_space_xyzs.append((world_1_1_grid_xycats[gt_idx, 0], world_1_1_grid_xycats[gt_idx, 1], 0))
    per_entry_frame_space_xys.append(np.array(frame_space_xys, dtype=np.float32))
    per_entry_grid_space_xyzs.append(np.array(grid_space_xyzs, dtype=np.float32))

In [41]:
intrinsics_matrix_guess = None
distortion_coefficients_guess = None
rms_reprojection_error, K, distortion_coefficients, rotation_vectors, translation_vectors = cv2.calibrateCamera(
    per_entry_grid_space_xyzs,
    per_entry_frame_space_xys,
    (entry.width, entry.height),
    intrinsics_matrix_guess,
    distortion_coefficients_guess,
)
print("RMS reprojection error:\n", rms_reprojection_error)
print("Intrinsics matrix:\n", K)
print("Distortion coefficients:\n", distortion_coefficients)

RMS reprojection error:
 124.72920472826033
Intrinsics matrix:
 [[1.35160493e+03 0.00000000e+00 9.30096963e+02]
 [0.00000000e+00 3.63483243e+03 7.63085229e+01]
 [0.00000000e+00 0.00000000e+00 1.00000000e+00]]
Distortion coefficients:
 [[-1.30185316  1.04378971  0.09903759  0.00381889 -0.30134412]]


In [39]:
K

array([[1.35160493e+03, 0.00000000e+00, 9.30096963e+02],
       [0.00000000e+00, 3.63483243e+03, 7.63085229e+01],
       [0.00000000e+00, 0.00000000e+00, 1.00000000e+00]])

In [87]:
# Estimate camera intrinsics using Zhang's method

import scipy.linalg

def extract_v_ij(H: np.ndarray, i: int, j: int) -> np.ndarray:
    v_ij = np.array([
        H[0, i] * H[0, j],
        H[0, i] * H[1, j] + H[1, i] * H[0, j],
        H[1, i] * H[1, j],
        H[2, i] * H[0, j] + H[0, i] * H[2, j],
        H[2, i] * H[1, j] + H[1, i] * H[2, j],
        H[2, i] * H[2, j]
    ])
    return v_ij

B_entries = []
for entry in ds_gt.entries:
    # Load homography
    H_grid_frame = np.array(entry.H_grid_img_vector).reshape(3, 3)
    # H_frame_grid = np.linalg.inv(H_grid_frame)
    H_frame_grid = H_grid_frame
    H_frame_grid /= H_frame_grid[2, 2]

    B_entries.append(extract_v_ij(H_frame_grid, 0, 1))
    B_entries.append(extract_v_ij(H_frame_grid, 0, 0) - extract_v_ij(H_frame_grid, 1, 1))

B_mat = np.array(B_entries)
_, _, V = scipy.linalg.svd(B_mat)
b_vec = V[-1, :]
B = np.array(
    [[b_vec[0], b_vec[1], b_vec[3]],
     [b_vec[1], b_vec[2], b_vec[4]],
     [b_vec[3], b_vec[4], b_vec[5]]])

def nearest_positive_definite(A):
    """Find the nearest positive-definite matrix to A."""
    B = (A + A.T) / 2
    eigvals, eigvecs = np.linalg.eigh(B)
    eigvals_clipped = np.clip(eigvals, 1e-8, None)  # force positive
    return eigvecs @ np.diag(eigvals_clipped) @ eigvecs.T

def compute_K_from_B(B):
    B11, B12, B13 = B[0, 0], B[0, 1], B[0, 2]
    B22, B23 = B[1, 1], B[1, 2]
    B33 = B[2, 2]

    v0 = (B12 * B13 - B11 * B23) / (B11 * B22 - B12**2)
    λ = B33 - (B13**2 + v0 * (B12 * B13 - B11 * B23)) / B11
    α = np.sqrt(λ / B11)
    β = np.sqrt(λ * B11 / (B11 * B22 - B12**2))
    γ = -B12 * α**2 * β / λ
    u0 = γ * v0 / β - B13 * α**2 / λ

    K = np.array([
        [α, γ, u0],
        [0, β, v0],
        [0, 0, 1]
    ])
    return K

B = nearest_positive_definite(B)

K = compute_K_from_B(B)
print(K)


[[ 3.00494939e+03  1.65683449e+02 -5.23914980e+01]
 [ 0.00000000e+00  9.07505226e+02  2.86990419e+03]
 [ 0.00000000e+00  0.00000000e+00  1.00000000e+00]]
