In [None]:
#Caspase/DAPI overaly Code

import pandas as pd
import numpy as np
from sklearn.neighbors import KDTree
from google.colab import files
import io, sys

# ---------- small helper ----------
def prompt_yn(msg="Continue? [y/n]: "):
    while True:
        a = input(msg).strip().lower()
        if a in ("y","n"): return a=="y"
        print("Please type 'y' or 'n'.")

# ---------- 1) Upload CSV ----------
print("Upload your QuPath measurements CSV (image filenames + centroid coordinates).")
up = files.upload()
if not up: raise SystemExit("No file uploaded.")
csv_name, csv_bytes = list(up.items())[0]

# ---------- 2) Settings ----------
TOL_UM = 5.0  # overlap tolerance (µm) — change as needed

# ---------- 3) Read CSV (sniff delimiter) ----------
def read_qupath_csv(bytes_obj):
    try:
        return pd.read_csv(io.BytesIO(bytes_obj), sep=None, engine="python")
    except Exception:
        return pd.read_csv(io.BytesIO(bytes_obj))

df = read_qupath_csv(csv_bytes)

# Use 'Image' if present, else first column as filenames
image_col = "Image" if "Image" in df.columns else df.columns[0]
df["__Image__"] = df[image_col].astype(str).str.strip().str.casefold()

# ---------- 4) Find centroid columns ----------
def find_xy_cols(df):
    # map lowercased header -> original
    lc = {str(c).strip().lower(): c for c in df.columns}
    x = (lc.get("centroid x µm") or lc.get("centroid x [µm]") or lc.get("centroid x [um]") or
         lc.get("centroid x (µm)") or lc.get("xm") or lc.get("x"))
    y = (lc.get("centroid y µm") or lc.get("centroid y [µm]") or lc.get("centroid y [um]") or
         lc.get("centroid y (µm)") or lc.get("ym") or lc.get("y"))
    if x is None or y is None:
        raise ValueError("Couldn't find centroid columns (expected e.g. 'Centroid X µm' / 'Centroid Y µm').")
    return x, y

xcol, ycol = find_xy_cols(df)
df = df.rename(columns={xcol: "X", ycol: "Y"})

# ---------- 5) STRICT channel mapping by filename ending ONLY ----------
img = df["__Image__"]
is_dapi  = img.str.endswith("-dapi.ndpi", na=False)
is_casp3 = img.str.endswith("-fitc.ndpi", na=False)  # FITC channel holds Caspase-3

df["Channel"] = None
df.loc[is_dapi,  "Channel"] = "DAPI"
df.loc[is_casp3, "Channel"] = "CASP3"

DAPI  = df[df["Channel"]=="DAPI"][["X","Y"]].reset_index(drop=True)
CASP3 = df[df["Channel"]=="CASP3"][["X","Y"]].reset_index(drop=True)

# ---------- 6) Overlap finder (DAPI → nearest CASP3 within TOL_UM) ----------
def pair_to_dapi(dapi_xy: np.ndarray, mark_xy: np.ndarray, tol: float):
    if len(dapi_xy)==0 or len(mark_xy)==0:
        return pd.DataFrame(columns=["X_DAPI","Y_DAPI","X_CASP3","Y_CASP3","dist_um"])
    tree = KDTree(mark_xy, leaf_size=40)
    dists, idx = tree.query(dapi_xy, k=1)
    dists = dists.ravel(); idx = idx.ravel()
    keep = dists <= tol
    out = pd.DataFrame({
        "X_DAPI": dapi_xy[:,0],
        "Y_DAPI": dapi_xy[:,1],
        "X_CASP3": mark_xy[idx,0],
        "Y_CASP3": mark_xy[idx,1],
        "dist_um": dists
    })
    return out[keep].reset_index(drop=True)

dapi_arr  = DAPI[["X","Y"]].to_numpy()
casp3_arr = CASP3[["X","Y"]].to_numpy()

pairs_dapi_casp3 = pair_to_dapi(dapi_arr, casp3_arr, TOL_UM)

# ---------- 7) Numeric QC (no plots) ----------
def rounded_dapi_set(pairs_df):
    if pairs_df.empty: return set()
    r = (pairs_df["X_DAPI"].round(4).astype(str) + "," + pairs_df["Y_DAPI"].round(4).astype(str))
    return set(r)

# Build a rounded key for *all* DAPI to find DAPI-only later
if not DAPI.empty:
    DAPI["_round_key"] = DAPI["X"].round(4).astype(str) + "," + DAPI["Y"].round(4).astype(str)
else:
    DAPI["_round_key"] = pd.Series(dtype=str)

dapi_in_casp3 = rounded_dapi_set(pairs_dapi_casp3)

# DAPI-ONLY (no CASP3 within tolerance)
is_dapi_only = ~DAPI["_round_key"].isin(dapi_in_casp3) if not DAPI.empty else pd.Series([], dtype=bool)
DAPI_ONLY = DAPI.loc[is_dapi_only, ["X","Y"]].reset_index(drop=True)

print("\nQC summary (no plots):")
print(f"  N_DAPI:                 {len(DAPI)}")
print(f"  N_CASP3 (FITC channel): {len(CASP3)}")
print(f"  N_DAPI+CASP3 (≤{TOL_UM} µm): {len(pairs_dapi_casp3)}")
print(f"  N_DAPI_ONLY:            {len(DAPI_ONLY)}")
print(f"  N_CASP3_ONLY:           {max(len(CASP3) - len(pairs_dapi_casp3), 0)}")

if not prompt_yn("Continue with save & download? [y/n]: "):
    raise SystemExit("Aborted by user after numeric QC.")

# ---------- 8) Build ONE CSV (pairs + DAPI-ONLY + SUMMARY) ----------
rows = []
if not pairs_dapi_casp3.empty:
    a = pairs_dapi_casp3.copy()
    a.insert(0, "RowType", "PAIR_DAPI_CASP3")
    rows.append(a)

if len(DAPI_ONLY):
    b = DAPI_ONLY.copy()
    b = b.rename(columns={"X":"X_DAPI","Y":"Y_DAPI"})
    b["X_CASP3"] = np.nan
    b["Y_CASP3"] = np.nan
    b["dist_um"] = np.nan
    b.insert(0, "RowType", "DAPI_ONLY")
    rows.append(b)

n_pairs = len(pairs_dapi_casp3)
n_dapi_only  = len(DAPI_ONLY)
n_casp3_only = max(len(CASP3) - n_pairs, 0)

summary = pd.DataFrame({
    "RowType": ["SUMMARY"]*6,
    "Metric": [
        "N_DAPI","N_CASP3",
        "N_DAPI_plus_CASP3","N_ONLY_DAPI","N_ONLY_CASP3",
        "TOL_UM"
    ],
    "Value": [
        len(DAPI), len(CASP3),
        n_pairs, n_dapi_only, n_casp3_only,
        TOL_UM
    ]
})
rows.append(summary)

out = pd.concat(rows, ignore_index=True) if rows else summary
out_name = "overlap_counts_dapi_casp3.csv"
out.to_csv(out_name, index=False)
files.download(out_name)
print(f"Done. Downloaded: {out_name}")