In [8]:
#   IMAGE_PATH, ROI, ANNOTATION_PATH, OUTPUT_DIR
#IMAGE_PATH = "./images/ncr_8338d_mcmaster_mz_mit20x2.jpg"       
IMAGE_PATH = "./images/die_flat.jpg"
ROI = (2112, 2400, 7600, 5740)  # <-- EDIT to your test region
ANNOTATION_PATH = "./mioc_annotated_netlist.txt" 
CSV_PATH    = "./color_sample_lab.csv"  
OUTPUT_DIR = "./output-poly-mask-extract"
#TEMPLATE_PATHS = ["./images-corner/NE_1.png",
#                  "./images-corner/SE_1.png",
#                  "./images-corner/NW_1.png",
#                  "./images-corner/SW_1.png"
#                  ]
#(any filenames ok; sizes can vary; apex assumed at template center)

CLASS_FILTER = "poly"   # only use rows whose first token/column equals this (case-insensitive)


In [12]:
import re

def read_poly_samples(csv_path: str, class_filter: str = None):
    """
    Robust reader for Lab/RGB triples with optional leading class label.
    Keeps only rows where the first cell matches class_filter (e.g., "poly"), if provided.
    Accepts either Lab (L,a,b) or RGB (R,G,B). Auto-converts to OpenCV 8-bit Lab.
    """
    def normalize_header(name: str) -> str:
        return re.sub(r'[^a-z0-9]+', '', name.lower())

    def extract_float(cell: str):
        m = re.search(r'[-+]?(?:\d*\.\d+|\d+)(?:[eE][-+]?\d+)?', cell)
        return float(m.group(0)) if m else None

    with open(csv_path, "r", encoding="utf-8-sig", errors="ignore") as f:
        raw_lines = [ln.rstrip("\n") for ln in f]

    # strip blanks/comments
    lines = [ln for ln in raw_lines if ln.strip() and not ln.lstrip().startswith(("#","//"))]
    if not lines:
        raise ValueError("CSV appears empty after cleaning.")

    # detect delimiter on first line
    first = lines[0]
    delims = [",",";","\t","|"]
    delim = max(delims, key=lambda d: first.count(d)) if any(first.count(d) for d in delims) else ","

    rows = [ln.split(delim) for ln in lines]
    headers = [normalize_header(h) for h in rows[0]]
    looks_header = any(h in ("l","a","b","lab_l","lab_a","lab_b","r","g","b","red","green","blue","class") for h in headers)

    data_rows = rows[1:] if looks_header else rows

    # optional class filter (first column)
    if class_filter:
        cf = class_filter.strip().lower()
        def is_kept(r):
            return len(r)>0 and r[0].strip().lower()==cf
        data_rows = [r for r in data_rows if is_kept(r)]
        if not data_rows:
            raise ValueError(f"No rows match class_filter='{class_filter}'. Check the first column labels.")

    # map columns (prefer named Lab/RGB; else AUTO = first three numeric cells after the class label if present)
    mode = "auto"; cols = None
    if looks_header:
        hmap = {headers[i]: i for i in range(len(headers))}
        for trio in (("l","a","b"), ("lab_l","lab_a","lab_b")):
            if all(k in hmap for k in trio):
                cols = (hmap[trio[0]], hmap[trio[1]], hmap[trio[2]]); mode = "lab"; break
        if cols is None:
            for trio in (("r","g","b"), ("red","green","blue")):
                if all(k in hmap for k in trio):
                    cols = (hmap[trio[0]], hmap[trio[1]], hmap[trio[2]]); mode = "rgb"; break

    triples = []
    for r in data_rows:
        if mode in ("lab","rgb") and cols is not None:
            c0,c1,c2 = cols
            vals = [extract_float(r[c0] if c0 < len(r) else ""),
                    extract_float(r[c1] if c1 < len(r) else ""),
                    extract_float(r[c2] if c2 < len(r) else "")]
        else:
            # AUTO: skip the first column if it’s a class label, then grab first three numeric cells
            start = 1 if (class_filter and len(r)>0) else 0
            nums = []
            for cell in r[start:]:
                v = extract_float(cell)
                if v is not None:
                    nums.append(v)
                if len(nums) == 3:
                    break
            vals = nums if len(nums)==3 else [None,None,None]
        if None not in vals:
            triples.append(vals)

    if not triples:
        raise ValueError("No valid numeric triples after filtering. Check CSV format.")

    arr = np.array(triples, dtype=float)

    # Convert to OpenCV 8-bit Lab
    if mode == "rgb":
        return rgb_to_lab(arr)
    else:
        return coerce_lab_array(arr)

# In main(), call with the filter:
# samples_lab = read_poly_samples(CSV_PATH, class_filter=CLASS_FILTER)


#### Simple color based metal extract (near LAB-poly filtered out via raster scan)

In [15]:
# ===============================[ GLOBALS / CONFIG CELL ]===============================
# Set these three before running:
#CSV_PATH    = "/path/to/color_sample_lab.csv"     # e.g., "/mnt/data/color_sample_lab-Copy1.csv"
#IMAGE_PATH  = "/path/to/die_image.png"
#OUTPUT_DIR  = "./output_poly_mask"

# --- Band selection config ---
# Choose how to compute Lab channel limits from your sample CSV.
#BAND_METHOD       = "percentile"   # "percentile" or "std"
BAND_METHOD       = "std"   # "percentile" or "std"

PERCENTILES_AB    = (1.0, 99.0)    # used if BAND_METHOD == "percentile"
PERCENTILES_L     = (1.0, 99.0)

# More precision (fewer false positives) → tighten to (10,90).
# More recall (fewer holes) → relax to (1,99) or disable the L gate.

K_STD_AB          = 2.5            # used if BAND_METHOD == "std"
K_STD_L           = 2.0

USE_L_GATE        = True           # also gate L* channel (recommended if substrate is similar in chroma)
CLIP_L_BOUNDS     = (0, 255)       # cv2 Lab uses 8-bit L in [0,255]
CLIP_A_B_BOUNDS   = (0, 255)       # cv2 Lab uses 8-bit a,b in [0,255]

# --- Mask & output options ---
SAVE_OVERLAY      = True           # writes an RGB overlay (image dimmed + green where poly detected)
SAVE_DEBUG_PLOTS  = False          # quick histograms of L,a,b samples (requires matplotlib)
OVERLAY_ALPHA     = 0.6            # 0..1; lower = darker background
MASK_FILENAME     = "poly_mask.png"
OVERLAY_FILENAME  = "poly_overlay.png"
BAND_JSON_FILENAME= "poly_lab_band.json"

# Optional large-image tiling (None disables tiling)
TILE_SIZE         = None           # e.g., (2048, 2048)
TILE_OVERLAP      = 32

# ===============================[ IMPORTS ]============================================
import os
import json
import csv
import math
import numpy as np
import cv2

# Make directory if it doesnt exist
os.makedirs(OUTPUT_DIR, exist_ok=True)

# --- Remove all files in OUTPUT_DIR ---
for fname in os.listdir(OUTPUT_DIR):
    fpath = os.path.join(OUTPUT_DIR, fname)
    if os.path.isfile(fpath):
        os.remove(fpath)

# Optional plotting (only used if SAVE_DEBUG_PLOTS=True)
if SAVE_DEBUG_PLOTS:
    import matplotlib.pyplot as plt

# ===============================[ UTILS CELL ]=========================================
def ensure_dir(path: str):
    os.makedirs(path, exist_ok=True)

'''
def read_poly_samples(csv_path: str):
    """
    Reads color samples from CSV. Accepts either Lab or RGB columns.
    - Preferred headers (case-insensitive): L, A, B
    - Alternate RGB headers: R, G, B  (will be converted to Lab using OpenCV)
    Returns: samples_lab as np.ndarray of shape (N, 3) in OpenCV Lab 8-bit space.
    """
    # Load rows
    with open(csv_path, "r", newline="") as f:
        sniffer = csv.Sniffer()
        sample = f.read(4096)
        f.seek(0)
        has_header = sniffer.has_header(sample)
        dialect = sniffer.sniff(sample)
        reader = csv.reader(f, dialect)
        headers = None

        rows = []
        if has_header:
            headers = next(reader, None)
            if headers is None:
                raise ValueError("CSV appears empty.")
            headers = [h.strip().lower() for h in headers]
            for r in reader:
                if any(cell.strip() for cell in r):
                    rows.append(r)
        else:
            for r in reader:
                if any(cell.strip() for cell in r):
                    rows.append(r)

    if headers is None:
        # Try to infer by count: default to first three columns as L,a,b
        if len(rows) == 0:
            raise ValueError("No rows found in CSV.")
        data = np.array(rows, dtype=float)
        if data.shape[1] < 3:
            raise ValueError("CSV must have at least 3 numeric columns for L,a,b or R,G,B.")
        lab = data[:, :3]
        return coerce_lab_array(lab)
    else:
        # Map columns
        h = {name: idx for idx, name in enumerate(headers)}
        # Try Lab first
        if all(k in h for k in ("l", "a", "b")):
            lab_rows = []
            for r in rows:
                try:
                    L = float(r[h["l"]]); A = float(r[h["a"]]); B = float(r[h["b"]])
                    lab_rows.append([L, A, B])
                except Exception:
                    continue
            if len(lab_rows) == 0:
                raise ValueError("No valid L,a,b rows parsed from CSV.")
            return coerce_lab_array(np.array(lab_rows, dtype=float))

        # Try RGB fallback
        if all(k in h for k in ("r", "g", "b")):
            rgb_rows = []
            for r in rows:
                try:
                    R = float(r[h["r"]]); G = float(r[h["g"]]); B = float(r[h["b"]])
                    rgb_rows.append([R, G, B])
                except Exception:
                    continue
            if len(rgb_rows) == 0:
                raise ValueError("No valid R,G,B rows parsed from CSV.")
            return rgb_to_lab(np.array(rgb_rows, dtype=float))

        raise ValueError("CSV headers must include either (L,A,B) or (R,G,B). Found: {}".format(headers))
'''

def coerce_lab_array(lab_float: np.ndarray):
    """
    Coerce Lab values into OpenCV's 8-bit Lab domain:
      - cv2.cvtColor with CV_8U expects Lab channels in [0,255] (OpenCV's scaled variant).
    If values look like standard Lab (L in [0,100], a,b ~ [-128,127]), map to OpenCV range.
    Otherwise, if already in 0..255, clip and cast.
    """
    lab = lab_float.copy()
    L = lab[:, 0]
    # Heuristics: if median L is <= 100, assume standard CIE L* in [0..100]
    if np.nanmedian(L) <= 100.0:
        # Map: L* 0..100 -> 0..255 ; a*,b* -128..127 -> 0..255
        lab[:, 0] = np.clip((lab[:, 0] / 100.0) * 255.0, 0, 255)
        lab[:, 1] = np.clip(lab[:, 1] + 128.0, 0, 255)
        lab[:, 2] = np.clip(lab[:, 2] + 128.0, 0, 255)
    else:
        # Assume already OpenCV-scaled
        lab[:, 0] = np.clip(lab[:, 0], 0, 255)
        lab[:, 1] = np.clip(lab[:, 1], 0, 255)
        lab[:, 2] = np.clip(lab[:, 2], 0, 255)
    return lab.astype(np.uint8)

def rgb_to_lab(rgb_float: np.ndarray):
    """
    Convert an array of RGB floats (0..255 or 0..1) to OpenCV 8-bit Lab (0..255 per channel).
    """
    rgb = rgb_float.copy().astype(np.float32)
    if np.nanmax(rgb) <= 1.0:
        rgb *= 255.0
    # build small image, convert with cv2
    img = rgb.reshape(-1, 1, 3).astype(np.uint8)
    # OpenCV uses BGR by default; convert RGB->BGR first
    bgr = img[:, :, ::-1]
    lab = cv2.cvtColor(bgr, cv2.COLOR_BGR2LAB)
    return lab.reshape(-1, 3)

def compute_lab_band(samples_lab_u8: np.ndarray,
                     method: str = "percentile",
                     p_ab = (1.0, 99.0),
                     p_l  = (1.0, 99.0),
                     k_std_ab: float = 2.5,
                     k_std_l: float  = 2.0,
                     use_l_gate: bool = True,
                     clip_l = (0, 255),
                     clip_ab = (0, 255)):
    """
    Compute per-channel inclusive low/high bounds in OpenCV Lab space.
    Returns: (lo, hi) where each is np.array([L_lo, a_lo, b_lo], dtype=uint8)
    """
    s = samples_lab_u8.astype(np.float32)
    Ls, As, Bs = s[:,0], s[:,1], s[:,2]

    if method == "percentile":
        a_lo, a_hi = np.nanpercentile(As, [p_ab[0], p_ab[1]])
        b_lo, b_hi = np.nanpercentile(Bs, [p_ab[0], p_ab[1]])
        if use_l_gate:
            L_lo, L_hi = np.nanpercentile(Ls, [p_l[0], p_l[1]])
        else:
            L_lo, L_hi = clip_l[0], clip_l[1]

    elif method == "std":
        a_mu, a_sd = np.nanmean(As), np.nanstd(As)
        b_mu, b_sd = np.nanmean(Bs), np.nanstd(Bs)
        a_lo, a_hi = a_mu - k_std_ab * a_sd, a_mu + k_std_ab * a_sd
        b_lo, b_hi = b_mu - k_std_ab * b_sd, b_mu + k_std_ab * b_sd
        if use_l_gate:
            L_mu, L_sd = np.nanmean(Ls), np.nanstd(Ls)
            L_lo, L_hi = L_mu - k_std_l * L_sd, L_mu + k_std_l * L_sd
        else:
            L_lo, L_hi = clip_l[0], clip_l[1]
    else:
        raise ValueError("Unknown BAND_METHOD: {}".format(method))

    lo = np.array([
        np.clip(L_lo, clip_l[0], clip_l[1]),
        np.clip(a_lo, clip_ab[0], clip_ab[1]),
        np.clip(b_lo, clip_ab[0], clip_ab[1]),
    ], dtype=np.uint8)

    hi = np.array([
        np.clip(L_hi, clip_l[0], clip_l[1]),
        np.clip(a_hi, clip_ab[0], clip_ab[1]),
        np.clip(b_hi, clip_ab[0], clip_ab[1]),
    ], dtype=np.uint8)

    return lo, hi

def load_image_lab_u8(image_path: str):
    """
    Load an image with OpenCV and return its Lab (8-bit) representation:
    - cv2.imread -> BGR
    - BGR -> Lab (OpenCV scaled: all three channels 0..255)
    """
    img_bgr = cv2.imread(image_path, cv2.IMREAD_COLOR)
    if img_bgr is None:
        raise FileNotFoundError(f"Could not read image: {image_path}")
    img_lab = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2LAB)
    return img_bgr, img_lab

def mask_from_band_lab(img_lab_u8: np.ndarray, lo: np.ndarray, hi: np.ndarray):
    """
    Inclusive axis-aligned band check in Lab:
      keep pixel if lo[c] <= img[c] <= hi[c] for c in {L,a,b}
    Returns a uint8 binary mask (0 or 255).
    """
    mask = cv2.inRange(img_lab_u8, lo, hi)
    return mask

def overlay_mask(img_bgr: np.ndarray, mask_u8: np.ndarray, alpha: float = 0.6):
    """
    Create an overlay: dim the base image and paint detected mask regions in green.
    """
    # Dim base
    base = (img_bgr.astype(np.float32) * alpha).astype(np.uint8)
    # Prepare green layer
    green = np.zeros_like(base)
    green[..., 1] = 255  # pure green

    m3 = cv2.merge([mask_u8, mask_u8, mask_u8])
    # Where mask is on, show green; else show dimmed base
    overlay = np.where(m3 > 0, green, base)
    return overlay

def save_debug_plots(samples_lab_u8: np.ndarray, lo: np.ndarray, hi: np.ndarray, out_dir: str):
    if not SAVE_DEBUG_PLOTS:
        return
    ensure_dir(out_dir)
    Ls, As, Bs = samples_lab_u8[:,0], samples_lab_u8[:,1], samples_lab_u8[:,2]
    plt.figure()
    plt.hist(Ls, bins=64); plt.title(f"L* (lo={lo[0]}, hi={hi[0]})")
    plt.savefig(os.path.join(out_dir, "hist_L.png")); plt.close()

    plt.figure()
    plt.hist(As, bins=64); plt.title(f"a* (lo={lo[1]}, hi={hi[1]})")
    plt.savefig(os.path.join(out_dir, "hist_a.png")); plt.close()

    plt.figure()
    plt.hist(Bs, bins=64); plt.title(f"b* (lo={lo[2]}, hi={hi[2]})")
    plt.savefig(os.path.join(out_dir, "hist_b.png")); plt.close()

# ===============================[ (OPTIONAL) TILING CELL ]==============================
def process_tiled(img_lab_u8: np.ndarray, lo: np.ndarray, hi: np.ndarray,
                  tile_size=(2048, 2048), overlap=32):
    """
    Axis-aligned Lab banding over tiles (useful for huge images).
    Returns full-size mask (uint8 0/255).
    """
    H, W = img_lab_u8.shape[:2]
    th, tw = tile_size
    out = np.zeros((H, W), dtype=np.uint8)

    y = 0
    while y < H:
        y0 = max(0, y - overlap)
        y1 = min(H, y + th + overlap)
        x = 0
        while x < W:
            x0 = max(0, x - overlap)
            x1 = min(W, x + tw + overlap)
            tile = img_lab_u8[y0:y1, x0:x1, :]

            tmask = mask_from_band_lab(tile, lo, hi)

            # paste only the core region (exclude overlaps) to avoid double-writing seams
            core_y0 = y if y == 0 else y
            core_y1 = min(y + th, H)
            core_x0 = x if x == 0 else x
            core_x1 = min(x + tw, W)

            # translate core coords to tile coords
            ty0 = core_y0 - y0
            ty1 = core_y1 - y0
            tx0 = core_x0 - x0
            tx1 = core_x1 - x0

            out[core_y0:core_y1, core_x0:core_x1] = tmask[ty0:ty1, tx0:tx1]
            x += tw
        y += th
    return out

# ===============================[ MAIN CELL ]===========================================
def main():
    ensure_dir(OUTPUT_DIR)

    # 1) Read sample CSV (Lab preferred; RGB supported)
    # samples_lab = read_poly_samples(CSV_PATH)  # OpenCV-scaled Lab (uint8)
    samples_lab = read_poly_samples(CSV_PATH, class_filter=CLASS_FILTER)

    # 2) Compute Lab band (low/high per channel)
    lo, hi = compute_lab_band(
        samples_lab,
        method=BAND_METHOD,
        p_ab=PERCENTILES_AB,
        p_l=PERCENTILES_L,
        k_std_ab=K_STD_AB,
        k_std_l=K_STD_L,
        use_l_gate=USE_L_GATE,
        clip_l=CLIP_L_BOUNDS,
        clip_ab=CLIP_A_B_BOUNDS
    )

    # Persist band values
    band_info = {
        "method": BAND_METHOD,
        "use_L_gate": USE_L_GATE,
        "lo": {"L": int(lo[0]), "a": int(lo[1]), "b": int(lo[2])},
        "hi": {"L": int(hi[0]), "a": int(hi[1]), "b": int(hi[2])},
        "percentiles_ab": PERCENTILES_AB,
        "percentiles_L": PERCENTILES_L,
        "k_std_ab": K_STD_AB,
        "k_std_L": K_STD_L
    }
    with open(os.path.join(OUTPUT_DIR, BAND_JSON_FILENAME), "w") as f:
        json.dump(band_info, f, indent=2)

    # Optional quick visual sanity: histograms
    save_debug_plots(samples_lab, lo, hi, OUTPUT_DIR)

    # 3) Load image and convert to OpenCV Lab
    img_bgr, img_lab = load_image_lab_u8(IMAGE_PATH)

    # 4) Raster walk (per-pixel Lab banding) → mask
    if TILE_SIZE is None:
        mask = mask_from_band_lab(img_lab, lo, hi)
    else:
        mask = process_tiled(img_lab, lo, hi, tile_size=TILE_SIZE, overlap=TILE_OVERLAP)

    # 5) Save mask and overlay
    mask_path = os.path.join(OUTPUT_DIR, MASK_FILENAME)
    cv2.imwrite(mask_path, mask)

    if SAVE_OVERLAY:
        overlay = overlay_mask(img_bgr, mask, alpha=OVERLAY_ALPHA)
        overlay_path = os.path.join(OUTPUT_DIR, OVERLAY_FILENAME)
        cv2.imwrite(overlay_path, overlay)

    # 6) Console summary
    H, W = mask.shape
    on = int((mask > 0).sum())
    frac = on / (H * W + 1e-9)
    print("Poly Lab band:", band_info)
    print(f"Mask saved: {mask_path}  (on-pixels={on}, fraction={frac:.6f})")
    if SAVE_OVERLAY:
        print(f"Overlay saved: {overlay_path}")

if __name__ == "__main__":
    main()

Poly Lab band: {'method': 'std', 'use_L_gate': True, 'lo': {'L': 174, 'a': 116, 'b': 135}, 'hi': {'L': 207, 'a': 125, 'b': 147}, 'percentiles_ab': (1.0, 99.0), 'percentiles_L': (1.0, 99.0), 'k_std_ab': 2.5, 'k_std_L': 2.0}
Mask saved: ./out-poly/poly_mask.png  (on-pixels=6388617, fraction=0.049513)
Overlay saved: ./out-poly/poly_overlay.png
