In [2]:
# %% [Cell 1] MIP selection in Jupyter - four CSVs in the same folder
# SPDX-License-Identifier: MIT
# Copyright (c) 2025 Hamid
#
# This cell:
# - Finds or uses four CSV files with columns [time_ns, chan, ph]
# - Builds ADC histograms per channel and selects a MIP window by fractional-height crossings
# - Saves plots and a summary CSV to ./mip_outputs
# - Optionally writes LICENSE, README.md, and CITATION.cff for publishing

# %% Robust MIP + Valley for ONE CSV (background-final.csv) in CURRENT DIRECTORY
# Generates PNGs + per-figure PDFs, and one combined multipage PDF

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
from scipy.signal import savgol_filter
from matplotlib import patches as mpatches
from IPython.display import display
from matplotlib.backends.backend_pdf import PdfPages

plt.rcParams.update({"font.family": "Times New Roman", "figure.dpi": 140})

# ---------- unified text size control ----------
TEXT_CFG = {"title": 18, "label": 18, "tick": 18, "legend": 18}

def apply_text_sizes(ax, cfg=TEXT_CFG, legend_obj=None):
    ax.title.set_fontsize(cfg["title"])
    ax.xaxis.label.set_size(cfg["label"])
    ax.yaxis.label.set_size(cfg["label"])
    ax.tick_params(axis="both", labelsize=cfg["tick"])
    if legend_obj is not None:
        for txt in legend_obj.get_texts():
            txt.set_fontsize(cfg["legend"])

# ---- Input: exactly one CSV in the SAME folder as this notebook/script ----
IN_FILE = Path("background-final.csv").resolve()

# ---- Output folder ----
OUT_DIR = Path("./valley_pngs_fast").resolve()
OUT_DIR.mkdir(parents=True, exist_ok=True)

# Combined multipage PDF
ALL_PDF_PATH = OUT_DIR / "valley_plots_all.pdf"

CHANNELS = [1, 2, 3]

# ---- Histogram / search config ----
PH_EDGES = np.arange(0, 1025, 1)
BIN_CENTERS = 0.5 * (PH_EDGES[:-1] + PH_EDGES[1:])
MUON_RANGE = (170, 360)

# --- Smoothing ---
SG_WIN = 21
SG_POLY = 3

# --- Valley controls (hybrid) ---
GUARD_BINS = 12
MIN_WIDTH = 40
LEFT_FRAC = 0.20
RIGHT_FRAC = 0.12
SAFE_MARGIN_L = 6
SAFE_MARGIN_R = 6
SPAN_LEFT_MAX = 240
SPAN_RIGHT_MAX = 260

DATA_COLOR = "#d62728"

# ---------- helpers ----------
def file_exists(p: Path) -> bool:
    if not p.exists():
        print(f"[WARN] Missing file: {p}")
        return False
    return True

def load_threecol_csv(path: Path):
    """CSV columns: time_ns, chan, ph; coerce to numeric and drop NaNs."""
    df = pd.read_csv(path, header=None, names=["time_ns", "chan", "ph"])
    df["time_ns"] = pd.to_numeric(df["time_ns"], errors="coerce")
    df["chan"] = pd.to_numeric(df["chan"], errors="coerce")
    df["ph"] = pd.to_numeric(df["ph"], errors="coerce")
    df = df.dropna(subset=["time_ns", "chan", "ph"]).copy()
    df["chan"] = df["chan"].astype(int)
    return df

def hist_channel(ph, edges):
    c, _ = np.histogram(ph, bins=edges)
    return c.astype(float)

def sg_smooth(y, win=SG_WIN, poly=SG_POLY):
    n = len(y)
    w = int(win) | 1
    if n < 5:
        return y.copy()
    if w > n - 1:
        w = max(5, ((n - 1) // 2) * 2 + 1)  # nearest valid odd <= n-1
    if w < 5 or w >= n:
        return y.copy()
    return savgol_filter(y, window_length=w, polyorder=min(poly, w - 1), mode="interp")

def estimate_bg_local(y):
    if y.size < 20:
        return float(np.median(y))
    q = np.quantile(y, 0.2)
    return float(np.median(y[y <= q]))

def pick_muon_peak_strict(counts, x, muon_range, win=SG_WIN, poly=SG_POLY):
    sm = sg_smooth(counts, win, poly)
    mask = (x >= muon_range[0]) & (x <= muon_range[1])
    if not np.any(mask):
        i0 = int(np.argmax(sm))
        return i0, sm
    roi = sm.copy()
    roi[~mask] = -np.inf
    i0 = int(np.nanargmax(roi))
    L = max(0, i0 - 25)
    R = min(len(counts) - 1, i0 + 25)
    i_ref = int(np.argmax(counts[L:R + 1])) + L
    return i_ref, sm

def valley_bounds_hybrid(counts, x, i0):
    sm = sg_smooth(counts, SG_WIN, SG_POLY)
    n = len(sm)

    d1 = np.gradient(sm)
    d2 = np.gradient(d1)
    mins = np.where((sm[1:-1] < sm[:-2]) & (sm[1:-1] < sm[2:]))[0] + 1

    i_mu_min = int(np.searchsorted(x, MUON_RANGE[0], side="left"))
    i_mu_max = int(np.searchsorted(x, MUON_RANGE[1], side="right")) - 1
    spanL = int(np.clip(0.8 * (i0 - i_mu_min), 80, SPAN_LEFT_MAX))
    spanR = int(np.clip(0.9 * (i_mu_max - i0), 100, SPAN_RIGHT_MAX))
    Lwin, Rwin = max(0, i0 - spanL), min(n - 1, i0 + spanR)

    b = estimate_bg_local(sm[Lwin:Rwin + 1])
    pk = float(sm[i0])
    thrL = b + LEFT_FRAC * max(pk - b, 1.0)
    thrR = b + RIGHT_FRAC * max(pk - b, 1.0)

    left_lo = max(Lwin, i0 - spanL)
    left_hi = max(left_lo, i0 - GUARD_BINS)

    left_cands = mins[(mins >= left_lo) & (mins <= left_hi)]
    iL_min = int(left_cands[np.argmin(sm[left_cands])]) if left_cands.size else None

    segL = sm[left_lo:i0 + 1]
    idxL = np.where(segL <= thrL)[0]
    iL_frac = int(left_lo + idxL[-1]) if idxL.size else None

    infl_stop = max(left_lo, i0 - GUARD_BINS)
    iL_infl = (int(np.argmax(d2[left_lo:infl_stop])) + left_lo) if infl_stop > left_lo else None

    muon_floor_adc = max(MUON_RANGE[0] - 10, x[i0] - 180)
    floor_idx = int(np.searchsorted(x, muon_floor_adc, side="left"))

    left_candidates = [c for c in (iL_min, iL_frac, iL_infl) if c is not None]
    iL = min(left_candidates) if left_candidates else max(floor_idx, i0 - 3 * SG_WIN)
    iL = max(iL - SAFE_MARGIN_L, floor_idx)
    iL = int(np.clip(iL, 0, n - 2))

    right_lo = min(i0 + GUARD_BINS, Rwin)
    right_hi = Rwin

    right_cands = mins[(mins >= right_lo) & (mins <= right_hi)]
    iR_min = int(right_cands[0]) if right_cands.size else None

    segR = sm[i0:right_hi + 1]
    idxR = np.where(segR <= thrR)[0]
    iR_frac = int(i0 + idxR[0]) if idxR.size else None

    right_candidates = [c for c in (iR_min, iR_frac) if c is not None]
    iR = max(right_candidates) if right_candidates else min(n - 1, i0 + 3 * SG_WIN)
    iR = min(iR + SAFE_MARGIN_R, n - 1)
    iR = int(np.clip(max(iR, iL + MIN_WIDTH), iL + 1, n - 1))

    return iL, iR, sm

def plot_and_save_linear(title, x, counts, sm, iL, iR, color, outfile, label, pdf_pages):
    fig = plt.figure(figsize=(9.8, 5.6))
    ax = plt.gca()

    ax.plot(x, counts, color=color, lw=1.8, label=label)
    ax.plot(x, sm, color="black", lw=2.0, label="Smoothed (SG)")

    x0, x1 = float(x[iL]), float(x[iR])
    ax.axvspan(x0, x1, color=(0.63, 0.13, 0.94, 0.22), lw=0)
    ax.axvline(x0, color=(0.63, 0.13, 0.94, 0.9), lw=1.0)
    ax.axvline(x1, color=(0.63, 0.13, 0.94, 0.9), lw=1.0)

    valley_label = f"Valley [{int(round(x0))}, {int(round(x1))}]"
    valley_patch = mpatches.Patch(
        facecolor=(0.63, 0.13, 0.94, 0.22),
        edgecolor=(0.63, 0.13, 0.94, 0.9),
        label=valley_label,
    )

    handles, labels_ = ax.get_legend_handles_labels()
    handles.append(valley_patch)
    labels_.append(valley_label)
    leg = ax.legend(handles, labels_, frameon=True)

    ymax = max(np.nanmax(counts), np.nanmax(sm)) if np.isfinite(np.nanmax(counts)) else 1.0
    ax.set_ylim(0, ymax * 1.12)
    ax.set_xlim(0, 1024)
    ax.set_xlabel("Pulse Height (ADC)")
    ax.set_ylabel("Counts")
    ax.set_title(title.replace("—", "-") + " (Linear)")
    ax.grid(True, alpha=0.3)

    apply_text_sizes(ax, TEXT_CFG, legend_obj=leg)

    fig.tight_layout()

    fig.savefig(OUT_DIR / outfile, bbox_inches="tight")
    pdf_name = Path(outfile).with_suffix(".pdf").name
    fig.savefig(OUT_DIR / pdf_name, bbox_inches="tight")

    if pdf_pages is not None:
        pdf_pages.savefig(fig, bbox_inches="tight")

    plt.close(fig)

# ---------- run ----------
print("Current working directory:", Path(".").resolve())
print("Input file:", IN_FILE)

if not file_exists(IN_FILE):
    raise FileNotFoundError(f"Could not find {IN_FILE} in the current directory.")

df = load_threecol_csv(IN_FILE)

summary_rows = []
pdf_pages = PdfPages(ALL_PDF_PATH)

for ch in CHANNELS:
    ph = df.loc[df["chan"] == ch, "ph"].to_numpy()

    if ph.size == 0:
        summary_rows.append(
            dict(file=IN_FILE.name, channel=ch, min_adc=np.nan, max_adc=np.nan, peak_adc=np.nan)
        )
        continue

    counts = hist_channel(ph, PH_EDGES)
    i0, _ = pick_muon_peak_strict(counts, BIN_CENTERS, MUON_RANGE)
    iL, iR, sm = valley_bounds_hybrid(counts, BIN_CENTERS, i0)

    min_adc = int(BIN_CENTERS[iL])
    max_adc = int(BIN_CENTERS[iR])
    peak_adc = int(BIN_CENTERS[i0])

    summary_rows.append(
        dict(file=IN_FILE.name, channel=ch, min_adc=min_adc, max_adc=max_adc, peak_adc=peak_adc)
    )

    title = f"Pulse-Height - Background • Channel {ch}"
    stub = f"Background__Ch{ch}_lin.png"
    plot_and_save_linear(title, BIN_CENTERS, counts, sm, iL, iR, DATA_COLOR, stub, "Data", pdf_pages)

pdf_pages.close()

valley_df = pd.DataFrame(summary_rows)
csv_path = OUT_DIR / "valley_thresholds_summary.csv"
valley_df.to_csv(csv_path, index=False)

print(f"Saved {len(summary_rows)} rows to {csv_path}")
print(f"Combined multi-page PDF saved to: {ALL_PDF_PATH}")
display(valley_df)



Current working directory: /Users/basiri/Codes/GitHub/MIPselection
Input file: /Users/basiri/Codes/GitHub/MIPselection/background-final.csv
Saved 3 rows to /Users/basiri/Codes/GitHub/MIPselection/valley_pngs_fast/valley_thresholds_summary.csv
Combined multi-page PDF saved to: /Users/basiri/Codes/GitHub/MIPselection/valley_pngs_fast/valley_plots_all.pdf


Unnamed: 0,file,channel,min_adc,max_adc,peak_adc
0,background-final.csv,1,211,324,249
1,background-final.csv,2,197,328,242
2,background-final.csv,3,189,295,228
