# Dark-Speck Area Calculation (mm²) with Bilateral Filter Tuning

**What this notebook does**
- For each handsheet image and its paired LabelMe JSON, compute:
  - Sheet → `mm² per pixel` scale from known diameter
  - ROI rectangle area (px, mm²) for labels starting with `S`, `p`, `r`, or `1`
  - Dark-speck area inside each ROI via grayscale + Otsu (px, mm²)
- Save ROI crops to `Area/cropped_rois/…`
- Save masked overlays to `Area/masked_specks/…`
- Hyperparameter sweep for bilateral filter (`d`, `sigmaColor`, `sigmaSpace`)
  using a ground-truth CSV (pixel count by human); pick params minimizing **STD of % error**.
- Write the best run rows to `speck_report_mm2_with_cropped_images.csv`.

In [None]:
from pathlib import Path
import json, csv, cv2, numpy as np, math
import pandas as pd
import os
from tqdm import tqdm

# constants
D_MM = 164.4                                    # true sheet diameter (mm)
A_SHEET_MM2 = math.pi * (D_MM / 2) ** 2         # real sheet area (mm²)

folder       = Path("Area")                     # images + JSON here
crop_dir     = folder / "cropped_rois"
mask_dir     = folder / "masked_specks"
out_csv      = "speck_report_mm2_with_cropped_images.csv"

crop_dir.mkdir(exist_ok=True)
mask_dir.mkdir(exist_ok=True)
COLOR      = (0, 0, 255)    # BGR to red; change to (0,255,255)=yellow and etc
ALPHA      = 0.5
rows = []                                       # CSV rows (populated during runs)

## Dark-Speck Segmentation (Otsu Threshold on Gray)

Segmentation of dark specks inside each region of interest using grayscale + Otsu with an **inverse** binary threshold:
- Converts ROI to grayscale
- Chooses threshold automatically (Otsu)
- Inverts so darker specks → 255 (foreground), background → 0

Returns a **binary** `uint8` mask:
- 255 = speck pixels
- 0   = background


In [None]:
# create binary mask of dark speck in ROI 
def mask_dark_speck(roi_bgr):
    gray = cv2.cvtColor(roi_bgr, cv2.COLOR_BGR2GRAY)
    _, mask = cv2.threshold(
        gray, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU
    )
    return mask  # uint8: 255 = speck, 0 = background


## Per-Param Run (Process images, write crops/overlays, update GT)

 Run one pass for a single set of bilateral-filter parameters

Inputs
- `gt_data`: DataFrame with columns:
  - `image` (e.g., `sheet001_S1_1.png`) — identifies ROI crops to evaluate
  - `dark_px_by hand_mm2` — ground-truth speck area (mm²)
- `d`, `sigmaColor`, `sigmaSpace`: bilateral filter hyperparameters

What this cell does
- For each image + matching JSON:
  - Compute `mm² per px = A_SHEET_MM2 / (H*W)` for that image
  - For each rectangle `S/p/r/1`, crop ROI
  - Apply bilateral filter with provided params
  - Segment dark speck → `dark_px`, `dark_mm2`
  - Save crop & masked overlay
  - Record metrics row and **write `dark_px_by auto_mm2`** into `gt_data` for the ROI
- Returns accumulated `rows` (metrics) and the modified `gt_data`.


In [None]:
def finetuning(gt_data, d, sigmaColor, sigmaSpace):
    imgs_names = gt_data['image'].tolist()
    # iterate over every image file
    for img_path in folder.iterdir():
        if img_path.suffix.lower() not in (".jpg", ".jpeg", ".png"):
            continue

        # find any JSON whose name begins with the image’s stem
        json_matches = list(folder.glob(f"{img_path.stem}*.json"))
        if not json_matches:
            print(f"[skip] {img_path.name} → no JSON")
            continue
        json_path = json_matches[0]

        img = cv2.imread(str(img_path))
        if img is None:
            print(f"[warn] cannot read {img_path.name}")
            continue

        H, W   = img.shape[:2]
        img_px = H * W
        mm2_per_px = A_SHEET_MM2 / img_px          # scale factor for this image

        with open(json_path) as f:
            ann = json.load(f)

        # index rectangles so filenames stay unique even if labels repeat
        rect_counter = 0

        for shp in ann["shapes"]:
            if (
                shp.get("shape_type") != "rectangle"
                or not shp["label"]
                or shp["label"][0] not in ("S", "p", "r", '1')
            ):
                continue

            rect_counter += 1
            label = shp["label"]
            base_name = f"{img_path.stem}_{label}_{rect_counter}"
            if f"{base_name}.png" not in imgs_names:
                continue

            # rectangle corners (clipped to image bounds)
            (x1, y1) = map(int, shp["points"][0])
            (x2, y2) = map(int, shp["points"][1])
            x1, x2 = sorted((max(0, x1), min(W, x2)))
            y1, y2 = sorted((max(0, y1), min(H, y2)))

            roi      = img[y1:y2, x1:x2]
            roi_px   = roi.shape[0] * roi.shape[1]
            roi_mm2  = roi_px * mm2_per_px

            # bilateral filter (the tunable prefilter)
            roi = cv2.bilateralFilter(
                roi,
                d=d,                  # pixel-neighbourhood diameter
                sigmaColor=sigmaColor,# larger ⇒ stronger colour smoothing
                sigmaSpace=sigmaSpace
            )

            mask = mask_dark_speck(roi)

            # optional post-processing hooks (LEFT DISABLED)
            k = np.ones((5, 5), np.uint8)
            # mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN,  k, 2)
            # mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, k, 2)
            # num, lbl, stats, _ = cv2.connectedComponentsWithStats(mask)
            # if num > 1:
            #     largest = 1 + np.argmax(stats[1:, cv2.CC_STAT_AREA])
            #     mask = np.where(lbl == largest, 255, 0).astype("uint8")

            dark_px  = int(np.count_nonzero(mask))
            dark_mm2 = dark_px * mm2_per_px

            # write back per-ROI automatic area to GT table
            gt_data.loc[gt_data['image'] == f"{base_name}.png", "dark_px_by auto_mm2"] = dark_mm2

            # ── save ROI and masked speck images ───────────────────────────────
            cv2.imwrite(str(crop_dir / f"{base_name}.png"), roi)

            overlay = roi.copy()
            overlay[mask == 255] = COLOR                    # paint cluster solid colour
            viz = cv2.addWeighted(overlay, ALPHA, roi, 1-ALPHA, 0)  # blend
            cv2.imwrite(str(mask_dir / f"{base_name}_mask.png"), viz)

            # ── collect CSV row ────────────────────────────────────────────────
            rows.append([
                img_path.name,
                label,
                img_px,
                round(A_SHEET_MM2, 2),
                roi_px,
                round(roi_mm2, 2),
                dark_px,
                round(dark_mm2, 2),
            ])
    return rows, gt_data

## Parameter Search (Bilateral Filter) and Report

Strategy
- Sweep `d ∈ {5,10,15,20,25,30}`, `sigmaColor ∈ {5…40 step 5}`, `sigmaSpace ∈ {5…30 step 5}`.
- For each setting:
  - Run `finetuning(...)` (fills `dark_px_by auto_mm2` per ROI that exists in GT).
  - Compute `% error = |hand - auto| / hand * 100` per ROI on the **current GT**.
  - Track the parameter set with **lowest standard deviation of % error**.
- Write the **best** run’s `rows` to CSV and print the best params.

In [None]:
if __name__=='__main__':
    gt = pd.read_csv("GT_checking.csv")
    gt['dark_px_by auto_mm2'] = 0.0

    ds = [i for i in range(5, 35, 5)]
    sigmaColors = [i for i in range(5, 45, 5)]
    sigmaSpaces = [i for i in range(5, 35, 5)]

    best_d = -1
    best_sigc = -1
    best_sigs = -1
    best_rows = []
    least_error = 10.7306852  # initialize with a baseline STD

    for d1 in tqdm(ds):
        for sigc in tqdm(sigmaColors):
            for sigs in tqdm(sigmaSpaces):
                print(f"Current params: d={d1}, sigmaColor={sigc}, sigmaSpace={sigs}")
                rows, gt_data = finetuning(gt, d1, sigc, sigs)
                gt['error'] = np.abs(gt['dark_px_by hand_mm2'] - gt['dark_px_by auto_mm2'])/gt['dark_px_by hand_mm2'] * 100

                if least_error > gt['error'].std():
                    least_error = gt['error'].std()
                    best_d = d1
                    best_sigc = sigc
                    best_sigs = sigs
                    best_rows = rows
                    print(f"Best STD: {least_error}\nBest params: d={best_d}, sigmaColor={best_sigc}, sigmaSpace={best_sigs}")
                else:
                    print(f"Best STD: {least_error}\nBest params: d={best_d}, sigmaColor={best_sigc}, sigmaSpace={best_sigs}")

    # write CSV for the best run
    with open(out_csv, "w", newline="") as f:
        writer = csv.writer(f)
        writer.writerow([
            "image", "label",
            "img_px",  "img_mm2",
            "roi_px",  "roi_mm2",
            "dark_px", "dark_mm2"
        ])
        writer.writerows(best_rows)

    print(f"Best STD: {least_error}\nBest params: d={best_d}, sigmaColor={best_sigc}, sigmaSpace={best_sigs}")
