# DS681 Midterm Assignment
Abhinav Kumar

This notebook implements Zhang's camera calibration.
Using: NumPy, SciPy, PyTorch, Pandas; Kornia helpers only


In [24]:
import numpy as np
import pandas as pd
from pathlib import Path, PurePosixPath
from PIL import Image
from huggingface_hub import login
from scipy.ndimage import gaussian_filter, sobel
import os

tok = os.getenv("HUGGINGFACE_TOKEN")
if tok: login(tok)

SOURCE_PARQUET = "hf://datasets/pantelism/wide-camera-calibration/data/train-00000-of-00001.parquet"
IMAGE_BASE_DIR = "."
NUM_COLS, NUM_ROWS = 9, 7
SQUARE_SIZE_M = 0.025
OUTPUT_PARQUET = "corner_annotations.parquet"
RANDOM_SUBSAMPLE = None

print("Config OK")

Config OK


In [25]:
from pathlib import Path

def get_three_points(gray, img_path, preview_dir="_previews"):
    h, w = gray.shape
    pdir = Path(preview_dir); pdir.mkdir(exist_ok=True, parents=True)
    prev_path = pdir / (Path(img_path).stem + "_preview.png")

    try:
        import matplotlib.pyplot as plt
        plt.figure(figsize=(8,6))
        plt.imshow(gray, cmap="gray")
        plt.title(f"{img_path}\nType TLx TLy TRx TRy BLx BLy in console")
        plt.axis("on")
        plt.savefig(prev_path, dpi=120, bbox_inches="tight")
        plt.close()
    except Exception:
        Image.fromarray(gray).save(prev_path)

    print(f"\nSaved preview → {prev_path} (width={w}, height={h})")
    s = input("Enter TLx TLy TRx TRy BLx BLy (space-separated): ").strip()
    vals = [float(v) for v in s.split()]
    if len(vals) != 6:
        raise RuntimeError("Need 6 numbers.")
    TL = (vals[0], vals[1]); TR = (vals[2], vals[3]); BL = (vals[4], vals[5])
    return [TL, TR, BL]


In [26]:
EXTRACT_DIR = Path(IMAGE_BASE_DIR) / "_images"
EXTRACT_DIR.mkdir(parents=True, exist_ok=True)

def load_image_table(parquet_path, out_dir):
    df = pd.read_parquet(parquet_path)
    if "image" not in df.columns and "filename" in df.columns:
        df = df.rename(columns={"filename": "image"})
    if "image" not in df.columns:
        raise ValueError(f"Need an 'image' or 'filename' column. Got: {sorted(df.columns)}")

    ids, paths = [], []
    for i, row in df.reset_index(drop=True).iterrows():
        img = row["image"]
        img_id = row["image_id"] if "image_id" in row else f"img_{i:04d}"

        if isinstance(img, dict) and "bytes" in img:
            ext = PurePosixPath(img.get("path","")).suffix or ".jpg"
            p = out_dir / f"{img_id}{ext}"
            with open(p, "wb") as f:
                f.write(img["bytes"])
            ids.append(str(img_id))
            paths.append(str(p))
        elif isinstance(img, str):
            ids.append(str(img_id))
            paths.append(str(Path(img)))
        else:
            continue

    return pd.DataFrame({"image_id": ids, "image": paths})

df_images = load_image_table(SOURCE_PARQUET, out_dir=EXTRACT_DIR)
if RANDOM_SUBSAMPLE:
    df_images = df_images.sample(n=min(RANDOM_SUBSAMPLE, len(df_images)), random_state=42).reset_index(drop=True)

print(f"Loaded {len(df_images)} images → {EXTRACT_DIR}")
df_images.head()

Loaded 44 images → _images


Unnamed: 0,image_id,image
0,img_0000,_images/img_0000.jpg
1,img_0001,_images/img_0001.jpg
2,img_0002,_images/img_0002.jpg
3,img_0003,_images/img_0003.jpg
4,img_0004,_images/img_0004.jpg


In [27]:
def harris_response(gray, k=0.04, sigma=1.5):
    g = gray.astype(np.float32) / 255.0
    Ix = sobel(g, axis=1)
    Iy = sobel(g, axis=0)
    Ixx = gaussian_filter(Ix * Ix, sigma=sigma)
    Iyy = gaussian_filter(Iy * Iy, sigma=sigma)
    Ixy = gaussian_filter(Ix * Iy, sigma=sigma)
    det = Ixx * Iyy - Ixy * Ixy
    tr = Ixx + Iyy
    return det - k * (tr ** 2)

def local_max_in_window(resp, cx, cy, rad=7):
    h, w = resp.shape
    x0 = max(int(round(cx)) - rad, 0)
    x1 = min(int(round(cx)) + rad + 1, w)
    y0 = max(int(round(cy)) - rad, 0)
    y1 = min(int(round(cy)) + rad + 1, h)
    if x0 >= x1 or y0 >= y1:
        return int(round(cx)), int(round(cy))
    win = resp[y0:y1, x0:x1]
    yy, xx = np.unravel_index(np.argmax(win), win.shape)
    return x0 + xx, y0 + yy

def subpixel_quadratic(resp, x, y):
    h, w = resp.shape
    xi = int(np.clip(x, 1, w - 2))
    yi = int(np.clip(y, 1, h - 2))
    W = resp[yi-1:yi+2, xi-1:xi+2]
    xs, ys = np.meshgrid([-1, 0, 1], [-1, 0, 1])
    A = np.column_stack([xs.ravel()**2, ys.ravel()**2, (xs*ys).ravel(), xs.ravel(), ys.ravel(), np.ones(9)])
    z = W.ravel()
    try:
        a,b,c,d,e,f = np.linalg.lstsq(A, z, rcond=None)[0]
        M = np.array([[2*a, c],[c, 2*b]], dtype=np.float32)
        v = np.array([-d, -e], dtype=np.float32)
        if np.linalg.cond(M) < 1e4:
            off = np.linalg.solve(M, v)
            offx = float(np.clip(off[0], -1, 1)); offy = float(np.clip(off[1], -1, 1))
        else:
            offx = offy = 0.0
    except:
        offx = offy = 0.0
    return xi + offx, yi + offy


In [28]:
def propose_grid_from_three_clicks(num_cols, num_rows, clicks):
    (TLx,TLy), (TRx,TRy), (BLx,BLy) = clicks
    TL = np.array([TLx, TLy], np.float32)
    TR = np.array([TRx, TRy], np.float32)
    BL = np.array([BLx, BLy], np.float32)
    vx = (TR - TL) / max(num_cols - 1, 1)
    vy = (BL - TL) / max(num_rows - 1, 1)
    pts = []
    for r in range(num_rows):
        for c in range(num_cols):
            p = TL + c*vx + r*vy
            pts.append([p[0], p[1], c, r])
    return np.array(pts, np.float32)

In [29]:
def semi_auto_annotate_image(img_path, num_cols, num_rows, search_rad=7, k=0.04, sigma=1.5, show=True):
    gray = np.array(Image.open(img_path).convert("L"), dtype=np.uint8)

    # REPLACE THE OLD ginput PART WITH THIS:
    clicks = get_three_points(gray, img_path)  # returns [(TLx,TLy),(TRx,TRy),(BLx,BLy)]
    init = propose_grid_from_three_clicks(num_cols, num_rows, clicks)

    resp = harris_response(gray, k=k, sigma=sigma)
    refined = []
    for (u0, v0, c, r) in init:
        xi, yi = local_max_in_window(resp, u0, v0, rad=search_rad)
        ux, vy = subpixel_quadratic(resp, xi, yi)
        refined.append([ux, vy, int(c), int(r)])
    refined = np.array(refined, np.float32)

    if show:
        try:
            import matplotlib.pyplot as plt
            plt.figure(figsize=(10,8)); plt.imshow(gray, cmap='gray')
            plt.scatter(init[:,0], init[:,1], s=10, label='init')
            plt.scatter(refined[:,0], refined[:,1], s=12, label='refined')
            for (u0,v0,_,_), (ux,vy,_,_) in zip(init, refined):
                plt.plot([u0, ux], [v0, vy])
            plt.legend(); plt.show()
        except Exception:
            pass

    return pd.DataFrame(refined, columns=["u","v","col","row"])


In [30]:
def semi_auto_annotate_images(df_images, image_base_dir, num_cols, num_rows, square_size_m,
                              out_parquet="corner_annotations.parquet", search_rad=7, k=0.04, sigma=1.5):
    rows = []
    for _, r in df_images.iterrows():
        image_id = r["image_id"]
        img_path = (Path(image_base_dir) / r["image"]).resolve()
        if not img_path.exists():
            print(f"[WARN] missing {img_path}, skip"); continue
        print(f"\nImage: {img_path}")
        try:
            df = semi_auto_annotate_image(str(img_path), num_cols, num_rows,
                                          search_rad=search_rad, k=k, sigma=sigma, show=True)
            ans = input("Accept? [y/n]: ").strip().lower()
            if ans == "y":
                df["image_id"] = image_id
                df["X"] = df["col"] * float(square_size_m)
                df["Y"] = df["row"] * float(square_size_m)
                rows.append(df)
                print("Saved.")
            else:
                print("Discarded.")
        except Exception as e:
            print(f"[ERROR] {img_path}: {e}")
    if not rows:
        raise RuntimeError("No accepted annotations.")
    out = pd.concat(rows, ignore_index=True)
    out.to_parquet(out_parquet, index=False)
    print(f"Wrote {len(out)} rows to {out_parquet}")
    return out


In [32]:
_ = semi_auto_annotate_images(
    df_images=df_images,
    image_base_dir=IMAGE_BASE_DIR,
    num_cols=NUM_COLS,
    num_rows=NUM_ROWS,
    square_size_m=SQUARE_SIZE_M,
    out_parquet=OUTPUT_PARQUET,
    search_rad=9,
    k=0.04,
    sigma=1.2
)



Image: /workspaces/eng-ai-agents/assignments/midterm/_images/img_0000.jpg

Saved preview → _previews/img_0000_preview.png (width=1280, height=960)
Saved.

Image: /workspaces/eng-ai-agents/assignments/midterm/_images/img_0001.jpg

Saved preview → _previews/img_0001_preview.png (width=1280, height=960)
Saved.

Image: /workspaces/eng-ai-agents/assignments/midterm/_images/img_0002.jpg

Saved preview → _previews/img_0002_preview.png (width=1280, height=960)
Saved.

Image: /workspaces/eng-ai-agents/assignments/midterm/_images/img_0003.jpg

Saved preview → _previews/img_0003_preview.png (width=1280, height=960)
Saved.

Image: /workspaces/eng-ai-agents/assignments/midterm/_images/img_0004.jpg

Saved preview → _previews/img_0004_preview.png (width=1280, height=960)
Saved.

Image: /workspaces/eng-ai-agents/assignments/midterm/_images/img_0005.jpg

Saved preview → _previews/img_0005_preview.png (width=1280, height=960)
Saved.

Image: /workspaces/eng-ai-agents/assignments/midterm/_images/img_0006

In [33]:
df = pd.read_parquet(OUTPUT_PARQUET)
print(df.shape)
df.head()


(2772, 7)


Unnamed: 0,u,v,col,row,image_id,X,Y
0,358.537445,62.573559,0.0,0.0,img_0000,0.0,0.0
1,441.291351,71.680275,1.0,0.0,img_0000,0.025,0.0
2,540.188843,105.818779,2.0,0.0,img_0000,0.05,0.0
3,628.62561,110.238838,3.0,0.0,img_0000,0.075,0.0
4,718.764587,128.449692,4.0,0.0,img_0000,0.1,0.0
