<a href="https://colab.research.google.com/github/balakg/ipy-vision/blob/main/notebooks/cameras/pinhole_camera.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Pinhole Camera

This demo covers the basic functioning of the pinhole camera model. The transformation from a point in the world to a pixel in the image is affected by two components:

### 1. Camera Intrinsics ($K$)
$K$ represents the internal geometry of the camera. It handles the mapping from 3D camera coordinates to 2D pixel coordinates.
$$K = \begin{bmatrix} f_x & 0 & c_x \\ 0 & f_y & c_y \\ 0 & 0 & 1 \end{bmatrix}$$
* **Focal Lengths ($f_x,f_y$):** Control the magnification (zoom). These may be thought of as $f_x = f \cdot s_x$ and $f_y = f \cdot s_y$, where $f$ is the focal length (distance between the pinhole and the image plane), and $s_x, s_y$ account for the size of the rectangle for each pixel on the sensor.


* **Principal Point ($c_x, c_y$):** The location (in pixels) where the optical axis intersects the sensor, assuming ($0,0$) is at the top left of the image.

### 2. Camera Extrinsics ($[R|t]$)
This matrix defines the camera's pose ($t$) and rotation ($R$) in the world.

---

## üõ†Ô∏è How to use the Visualizer
* **3D World View (Left):** Shows the camera's optical center (black dot), the virtual image plane (cyan), and the projection rays (gray).
* **2D Image Space (Right):** Shows the resulting digital image in pixels.
* **Reset Button:** If the object leaves the field of view, use the **Reset All to Defaults** button to return to the starting configuration.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import ipywidgets as widgets
from ipywidgets import interactive_output, FloatSlider, IntSlider, VBox, Accordion, Button, Dropdown
from scipy.spatial.transform import Rotation as R
from google.colab import output
output.no_vertical_scroll()

# Define the two objects of this demo
def get_object_data(obj_type="Letter F", center_x=0, center_y=0, center_z=7.0, size=1.5):
    c = np.array([center_x, center_y, center_z])
    s = size

    if obj_type == "Letter F":
        vertices_rel = np.array([
            [0, 0, 0], [0, s, 0], [s*0.6, s, 0], [0, s*0.5, 0], [s*0.4, s*0.5, 0]
        ])
        edges = [(0, 1), (1, 2), (3, 4)]
        colors = ['#ff0000', '#00ff00', '#0000ff']
    else: # Cube
        s2 = s / 2
        vertices_rel = np.array([
            [-s2,-s2,-s2], [s2,-s2,-s2], [s2,s2,-s2], [-s2,s2,-s2],
            [-s2,-s2,s2], [s2,-s2,s2], [s2,s2,s2], [-s2,s2,s2]
        ])
        edges = [(0,1), (1,2), (2,3), (3,0), (4,5), (5,6), (6,7), (7,4), (0,4), (1,5), (2,6), (3,7)]
        colors = plt.cm.tab20(np.linspace(0, 1, len(edges)))

    vertices = vertices_rel + c
    return vertices, edges, colors


def plot_camera_demo(obj_type, f, sx, sy, cx, cy, tx, ty, tz, roll_cam, pitch_cam, yaw_cam, o_x, o_y, o_z, obj_size, elev, azim, roll_view):
    fig = plt.figure(figsize=(15, 7))
    vertices_w, edges, colors = get_object_data(obj_type, o_x, o_y, o_z, obj_size)

    # Effective focal lengths (shrunk/stretched projection)
    fx, fy = f * sx, f * sy

    # Image Resolution
    res_w, res_h = 2 * cx, 2 * cy

    # --- 1. Extrinsics ---
    cam_rot = R.from_euler('xyz', [roll_cam, pitch_cam, yaw_cam], degrees=True).as_matrix()
    cam_t = np.array([tx, ty, tz])
    R_w2c = cam_rot.T
    vertices_c = (R_w2c @ (vertices_w - cam_t).T).T

    # --- 2. Projection ---
    projected = []
    for v in vertices_c:
        z_c = max(v[2], 0.01)
        u = fx * (v[0] / z_c) + cx
        v = fy * (v[1] / z_c) + cy
        projected.append([u, v])
    projected = np.array(projected)

    # --- 3. 3D World View ---
    ax1 = fig.add_subplot(121, projection='3d')
    for i, edge in enumerate(edges):
        v1, v2 = vertices_w[edge[0]], vertices_w[edge[1]]
        ax1.plot([v1[0], v2[0]], [v1[1], v2[1]], [v1[2], v2[2]], color=colors[i], lw=4 if obj_type=="Letter F" else 2)

    # Physical image plane in 3D
    viz_f = f / 100.0
    w_viz, h_viz = (cx / f) * viz_f, (cy / f) * viz_f
    yy_c, zz_c = np.meshgrid(np.linspace(-w_viz, w_viz, 2), np.linspace(-h_viz, h_viz, 2))
    xx_c = np.full_like(yy_c, viz_f)
    plane_pts_w = (cam_rot @ np.stack([yy_c.flatten(), zz_c.flatten(), xx_c.flatten()], axis=0)).T + cam_t
    pw_x, pw_y, pw_z = [plane_pts_w[:,i].reshape(2, 2) for i in range(3)]
    ax1.plot_surface(pw_x, pw_y, pw_z, alpha=0.15, color='cyan', edgecolors='k')

    # Line from optical center through principal point
    axis_end_w = cam_rot @ np.array([0, 0, 10.0]) + cam_t
    ax1.plot([cam_t[0], axis_end_w[0]], [cam_t[1], axis_end_w[1]], [cam_t[2], axis_end_w[2]], color='black', ls='--', lw=1, alpha=0.6)

    # Image origin and x/y direction lines
    origin_w = cam_rot @ np.array([-w_viz, -h_viz, viz_f]) + cam_t
    ax1.scatter(origin_w[0], origin_w[1], origin_w[2], color='black', s=30, depthshade=False)
    ax1.quiver(origin_w[0], origin_w[1], origin_w[2], cam_rot[0,0], cam_rot[1,0], cam_rot[2,0], color='magenta', length=0.8, linewidth=4)
    ax1.quiver(origin_w[0], origin_w[1], origin_w[2], cam_rot[0,1], cam_rot[1,1], cam_rot[2,1], color='darkviolet', length=0.8, linewidth=4)

    # Optical center
    ax1.scatter(cam_t[0], cam_t[1], cam_t[2], color='black', s=60)
    ax1.text(cam_t[0], cam_t[1], cam_t[2] - 2, "Optical\nCenter", color='black', fontsize=9, fontweight='bold', ha='center')

    # Rays from 3D object to optical center.
    for v_w in vertices_w:
        ax1.plot([cam_t[0], v_w[0]], [cam_t[1], v_w[1]], [cam_t[2], v_w[2]], color='gray', alpha=0.15, lw=1)


    # Axis settings
    ax1.set_xlim(-6, 6); ax1.set_ylim(-6, 6); ax1.set_zlim(0, 10)
    ax1.set_xlabel('World X'); ax1.set_ylabel('World Y'); ax1.set_zlabel('World Z')
    ax1.view_init(elev=elev, azim=azim, roll=roll_view)
    ax1.set_title("3D World Space", pad=30, fontsize=14, fontweight='bold')

    # --- 4. 2D Pixel View ---
    ax2 = fig.add_subplot(122)
    ax2.set_facecolor('#E0F7FA')

    # Show projection
    for i, edge in enumerate(edges):
        p1, p2 = projected[edge[0]], projected[edge[1]]
        ax2.plot([p1[0], p2[0]], [p1[1], p2[1]], color=colors[i], lw=6 if obj_type=="Letter F" else 3)

    # Set image resolution limits
    ax2.set_xlim(0, res_w)
    ax2.set_ylim(res_h, 0)

    # Show image origin
    ax2.axhline(cy, color='red', lw=1, ls='--', alpha=0.3)
    ax2.axvline(cx, color='red', lw=1, ls='--', alpha=0.3)

    # Add image origin and axis arrows.
    arrow_len = res_w * 0.15
    ax2.arrow(0, 0, arrow_len, 0, color='magenta', width=arrow_len*0.08, head_width=arrow_len*0.25, head_length=arrow_len*0.2, length_includes_head=True, clip_on=False, zorder=11)
    ax2.arrow(0, 0, 0, arrow_len, color='darkviolet', width=arrow_len*0.08, head_width=arrow_len*0.25, head_length=arrow_len*0.2, length_includes_head=True, clip_on=False, zorder=11)
    ax2.scatter(0, 0, color='black', s=120, clip_on=False, zorder=12)

    ax2.set_title(f"Virtual Image ({int(res_w)}x{int(res_h)})", pad=30, fontsize=14, fontweight='bold')
    ax2.set_xlabel('x (pixels)');
    ax2.set_ylabel('y (pixels)')
    ax2.set_aspect('equal')
    plt.tight_layout(pad=3.0)
    plt.show()

# --- Widgets --

# Stylized Title
title_widget = widgets.HTML("""
<h2 style="color: #ffffff; margin-top: 0px; margin-bottom: 5px; font-family: sans-serif;">
    Pinhole Camera Demo
</h2>
<p style="color: #7f8c8d; margin-top: 0px; font-family: sans-serif; margin-bottom: 15px;">
    Adjust sliders to visualize how object and camera properties affect the generated 2D image.
</p>
""")


obj_drop = Dropdown(options=['Letter F', 'Cube'], value='Letter F', description='Object:')
f_s = FloatSlider(value=200.0, min=50, max=500, description='f', continuous_update=False)
sx_s = FloatSlider(value=1.0, min=0.1, max=3.0, step=0.1, description='sx', continuous_update=False)
sy_s = FloatSlider(value=1.0, min=0.1, max=3.0, step=0.1, description='sy', continuous_update=False)
cx_s = FloatSlider(value=128.0, min=10, max=256, description='cx', continuous_update=False)
cy_s = FloatSlider(value=128.0, min=10, max=256, description='cy', continuous_update=False)

tx_s, ty_s, tz_s = [FloatSlider(value=0, min=-5, max=5, description=d, continuous_update=False) for d in ['Cam X', 'Cam Y', 'Cam Z']]
roll_c, pitch_c, yaw_c = [IntSlider(value=0, min=-180, max=180, description=d, continuous_update=False) for d in ['Roll', 'Pitch', 'Yaw']]
ox_s, oy_s = [FloatSlider(value=0, min=-5, max=5, description=d, continuous_update=False) for d in ['Obj X', 'Obj Y']]
oz_s = FloatSlider(value=7.0, min=3.0, max=10, description='Obj Z', continuous_update=False)
size_s = FloatSlider(value=1.5, min=0.5, max=4, description='Size', continuous_update=False)
el_s = IntSlider(value=-37, min=-90, max=90, description='View Elev', continuous_update=False)
az_s = IntSlider(value=-51, min=-180, max=180, description='View Azim', continuous_update=False)
rv_s = IntSlider(value=-51, min=-180, max=180, description='View Roll', continuous_update=False)

def reset_values(b):
    obj_drop.value = 'Letter F'
    f_s.value, sx_s.value, sy_s.value = 200.0, 1.0, 1.0
    cx_s.value, cy_s.value = 128.0, 128.0
    tx_s.value, ty_s.value, tz_s.value = 0, 0, 0
    roll_c.value, pitch_c.value, yaw_c.value = 0, 0, 0
    ox_s.value, oy_s.value, oz_s.value = 0, 0, 7.0
    size_s.value, el_s.value, az_s.value, rv_s.value = 1.5, -37, -51, -51

reset_button = Button(description="Reset All to Defaults", button_style='warning', icon='refresh')
reset_button.on_click(reset_values)

ui_acc = Accordion(children=[
    VBox([obj_drop, size_s, ox_s, oy_s, oz_s]),
    VBox([f_s, sx_s, sy_s, cx_s, cy_s]),
    VBox([tx_s, ty_s, tz_s, roll_c, pitch_c, yaw_c]),
    VBox([el_s, az_s, rv_s])
])
titles = ['Object Properties', 'Intrinsics (f, sx, sy, cx, cy)', 'Extrinsics (R, t)', 'Visualizer Settings']
for i, t in enumerate(titles): ui_acc.set_title(i, t)

out = interactive_output(plot_camera_demo, {
    'obj_type': obj_drop, 'f': f_s, 'sx': sx_s, 'sy': sy_s, 'cx': cx_s, 'cy': cy_s,
    'tx': tx_s, 'ty': ty_s, 'tz': tz_s, 'roll_cam': roll_c, 'pitch_cam': pitch_c, 'yaw_cam': yaw_c,
    'o_x': ox_s, 'o_y': oy_s, 'o_z': oz_s, 'obj_size': size_s,
    'elev': el_s, 'azim': az_s, 'roll_view': rv_s
})

display(VBox([title_widget, reset_button, ui_acc, out],
             layout=widgets.Layout(
                width='fit-content',
                border='1px solid gray',
              padding='10px'
             )
))