# Homework 1 & 2

Elia Mirafiori VR537643

## Part 0 – Data Acquisition

### Acquisition of calibration images

To acquire the images, I've used my CAMERA and as a pattern I've opted for a chessboard 8x8 squares with 7x7 internal corners (the size of a square cell is 24mm per side). There have been collected 10 images to calibrate the camera with different poses and varying distances. All the images have been saved in JPEG format.

### Choice and Acquisition of the Object

TODO

## Part 1 – Camera Calibration

In [None]:
import numpy as np
import cv2 as cv
import glob
import os


calibration_assets_path: str = "assets/calibration/phone/screen/"
columns: int = 8
rows: int = 8
square_size: float = 0.024

# Termination criteria for corner refinement (sub-pixel accuracy)
# Stops when either:
#  - max iterations are reached
#  - or the desired accuracy is achieved
criteria = (
    cv.TERM_CRITERIA_MAX_ITER + cv.TERM_CRITERIA_EPS,
    30,  # max number of iterations
    0.001,  # minimum required accuracy (epsilon)
)
# Chessboard configuration
inner_corners = (columns - 1, rows - 1)  # number of INNER corners (columns, rows)
# Prepare 3D object points in real-world coordinates
# The chessboard lies on the Z = 0 plane
objp = np.zeros((inner_corners[0] * inner_corners[1], 3), np.float32)
# Generate grid and scale it by the square size (meter)
# ':'	All rows, ':2'	First two columns only (index 0 and 1)
objp[:, :2] = (
    np.mgrid[0 : inner_corners[0], 0 : inner_corners[1]].T.reshape(-1, 2) * square_size
)
# Containers for calibration points
objpoints = []  # 3D points in real-world space (meter)
imgpoints = []  # 2D points in image plane (pixels)
# Load all calibration images from disk
# Each image should show the same chessboard pattern
images = glob.glob(f"{calibration_assets_path}*.jpg")
# Loop over each calibration image
for img_path in images:
    print(f"Path: {img_path}")
    # Read image from disk (OpenCV loads images in BGR format)
    img_bgr = cv.imread(img_path)
    # Convert image to grayscale
    # Chessboard detection works on single-channel images
    img_gray = cv.cvtColor(img_bgr, cv.COLOR_BGR2GRAY)
    # Detect chessboard inner corners
    #
    # corners_found:
    #   - True if all expected corners are detected
    # corners:
    #   - Detected corner locations (pixel coordinates)
    #
    # chessboard_size = (columns, rows)
    # Must match the object points definition exactly
    corners_found, corners = cv.findChessboardCorners(img_gray, inner_corners, None)
    print(f"Corners found: {corners_found}")
    # If the chessboard was successfully detected
    if corners_found:
        # Store the known 3D object points (real-world coordinates)
        # Same for every image, since the chessboard geometry is fixed
        objpoints.append(objp)
        # Refine corner positions to sub-pixel accuracy
        #
        # This improves calibration precision significantly
        #
        # (11, 11)  -> search window size
        # (-1, -1)  -> use default dead zone
        # criteria  -> termination criteria defined earlier
        corners_refined = cv.cornerSubPix(
            img_gray, corners, (11, 11), (-1, -1), criteria
        )
        # Store the refined 2D image points (pixel coordinates)
        imgpoints.append(corners_refined)

        # Visual feedback: draw detected corners on the image
        cv.drawChessboardCorners(img_bgr, inner_corners, corners_refined, corners_found)
        # Display the image briefly
        cv.imshow(
            "Calibration Image",
            cv.resize(
                img_bgr,
                (img_bgr.shape[1] // 4, img_bgr.shape[0] // 4),
            ),
        )
        cv.waitKey(500)  # display for 500 ms

cv.destroyAllWindows()

# Use any image size from your dataset
image_shape = cv.imread(images[0]).shape[:2][::-1]  # width, height
# Camera calibration
#
# Inputs:
#  - objpoints : list of 3D real-world points (meter)
#  - imgpoints : list of corresponding 2D image points (pixels)
#  - image size: (width, height)
#
# Outputs:
#  - rms_error  : RMS re-projection error
#  - K          : camera intrinsic matrix (3x3)
#  - dist_coeffs: distortion coefficients (5x1)
#  - rot_vecs   : rotation vectors (3x1) (one per image)
#  - trans_vecs : translation vectors (3x1) (one per image)
#
# OpenCV uses Rodrigues vectors to represent rotation
# - 3 numbers → axis-angle representation
# - Converts to a 3×3 rotation matrix using:
#   - R, _ = cv.Rodrigues(rot_vecs[i])
rms_error, K, dist_coeffs, rot_vecs, trans_vecs = cv.calibrateCamera(
    objpoints, imgpoints, image_shape, None, None
)

# Compute the mean re-projection error (in pixels) over the calibration images
mean_error = 0
for i in range(len(objpoints)):
    imgpoints2, _ = cv.projectPoints(
        objpoints[i], rot_vecs[i], trans_vecs[i], K, dist_coeffs
    )
    error = cv.norm(imgpoints[i], imgpoints2, cv.NORM_L2) / len(imgpoints2)
    mean_error += error
print(f"Camera Matrix K: {K}")
print(f"Re-projection Error: {rms_error}")
# The error is good when it's under 0.08
print(f"Mean Re-projection Error (in pixels): {mean_error / len(objpoints)}")

# Save calibration parameters
current_dir = os.path.dirname(os.path.abspath(__file__))
param_path = os.path.join(calibration_assets_path, "calibration.npz")
# Save several arrays into a single file in uncompressed .npz format
np.savez(
    param_path,
    rms_error=rms_error,
    K=K,
    dist_coeffs=dist_coeffs,
    rot_vecs=rot_vecs,
    trans_vecs=trans_vecs,
)