In [1]:
# ============================================================================

# PART 1: CLEAN FITTING FUNCTIONS (dal codice funzionante dell'utente)

# Copy this entire file content FIRST

# ============================================================================




import re

import numpy as np

import io

import pandas as pd




try:

    from scipy.optimize import curve_fit

    _HAS_SCIPY = True

except:

    _HAS_SCIPY = False





# Constants
EFFICIENCY_LABEL = 'Efficiency (%)'


def detect_best_separator(sample_text, max_probe_lines=30):

    candidates = [',', ';', '\t', '|', ':', 'SPACE']

    lines = [l.strip() for l in sample_text.splitlines() if l.strip()]

    lines = lines[:max_probe_lines] if max_probe_lines else lines

    deccomma_hits = sum(bool(re.search(r'\d,\d', l)) for l in lines)

    

    def try_candidate(sep):

        ok = 0

        for l in lines:

            parts = re.split(r'\s+', l) if sep == 'SPACE' else l.split(sep)

            if len(parts) < 2: continue

            p0, p1 = parts[0], parts[1]

            if sep != ',':

                p0 = p0.replace(',', '.'); p1 = p1.replace(',', '.')

            try:

                x = float(p0); y = float(p1)

                if np.isfinite(x) and np.isfinite(y): ok += 1

            except: continue

        return ok

    

    scores = {sep: try_candidate(sep) for sep in candidates}

    best = max(candidates, key=lambda s: scores[s])

    use_decimal_comma = (deccomma_hits >= 3 and best != ',')

    return best, use_decimal_comma




def read_two_cols_anysep(csv_text, has_header=True):

    sep, deccomma = detect_best_separator(csv_text)

    xs, ys = [], []

    header_consumed = False

    

    for i, line in enumerate(csv_text.splitlines()):

        line = line.strip()

        if not line: continue

        parts = re.split(r'\s+', line) if sep == 'SPACE' else line.split(sep)

        if len(parts) < 2: continue

        

        if i == 0 and has_header and not header_consumed:

            t0 = parts[0].replace(' ', ''); t1 = parts[1].replace(' ', '')

            if deccomma: t0 = t0.replace(',', '.'); t1 = t1.replace(',', '.')

            try: float(t0); float(t1)

            except: header_consumed = True; continue

        

        s0 = parts[0].replace(' ', ''); s1 = parts[1].replace(' ', '')

        if deccomma: s0 = s0.replace(',', '.'); s1 = s1.replace(',', '.')

        try:

            xval = float(s0); yval = float(s1)

            if np.isfinite(xval) and np.isfinite(yval): xs.append(xval); ys.append(yval)

        except: continue

    

    n = min(len(xs), len(ys))

    xs, ys = xs[:n], ys[:n]

    x_arr = np.asarray(xs, dtype=float); y_arr = np.asarray(ys, dtype=float)

    order = np.argsort(x_arr)

    return x_arr[order], y_arr[order]




def logistic_eval(p, x):

    a, b, c, d = p

    return a + b / (1.0 + np.exp(c * (x - d)))




def fit_sigmoid(x, y, max_iter=3000):

    xr = max(1.0, np.max(x) - np.min(x))

    a = np.percentile(y, 10); b = max(5.0, np.percentile(y, 90) - a)

    c = 5.0 / xr; d = np.min(x) + 0.5 * xr

    p = np.array([a, b, c, d], float)

    def loss(par): return float(np.mean((logistic_eval(par, x) - y) ** 2))

    step = np.array([1.0, 1.0, c, xr * 0.1]); best = loss(p)

    for _ in range(max_iter):

        improved = False

        for j in range(4):

            for sign in (+1, -1):

                trial = p.copy(); trial[j] += sign * step[j]

                val = loss(trial)

                if val < best: p, best, improved = trial, val, True; break

            else: step[j] *= 0.7

        if not improved and np.max(step) < 1e-6: break

    return p




def _logistic_func(x, a, b, c, d):

    return a + b / (1.0 + np.exp(c * (x - d)))




def predict_sigmoid(params, xgrid):

    a, b, c, d = params

    return _logistic_func(np.asarray(xgrid, float), a, b, c, d)




def fit_sigmoid_refined(x, y, use_weights=False, bins=30, density_bandwidth=0.10):

    x = np.asarray(x, float)

    y = np.asarray(y, float)




    if not _HAS_SCIPY:

        return fit_sigmoid(x, y)




    x0, x1 = float(np.min(x)), float(np.max(x))

    xr = max(1.0, x1 - x0)

    y10, y90 = np.percentile(y, 10), np.percentile(y, 90)

    p0 = [y10, max(5.0, y90 - y10), 5.0 / xr, x0 + 0.5 * xr]




    y_min, y_max = float(np.min(y)), float(np.max(y))

    span = max(1.0, y_max - y_min)

    bounds = ([y_min - 0.2 * span, 0.0, -10.0 / xr, x0 - 0.2 * xr],

              [y_max + 0.2 * span, 4.0 * max(5.0, span),  10.0 / xr, x1 + 0.2 * xr])




    sigma = None

    if use_weights:

        # collega davvero lo slider: più punti -> più bin

        nb = max(10, min(120, int(len(x) * max(0.02, float(density_bandwidth)))))

        H, edges = np.histogram(x, bins=nb)

        idx = np.clip(np.digitize(x, edges) - 1, 0, len(H) - 1)

        dens = H[idx].astype(float)

        dens[dens < 1] = 1.0




        # up-weight delle zone dense (=> sigma piccolo)

        sigma = 1.0 / np.sqrt(dens)




    popt, _ = curve_fit(_logistic_func, x, y, p0=p0, bounds=bounds, maxfev=20000, sigma=sigma)

    return np.asarray(popt, float)




def fit_polynomial(x, y, deg=3):

    c = np.polyfit(x, y, deg); return np.polyval(c, x)




def fit_loess(x, y, frac=0.25):

    n = len(x); win = max(5, int(n * frac)); win += 1 - win % 2; half = win // 2

    y_fit = np.zeros(n)

    for i in range(n):

        i0 = max(0, i - half); i1 = min(n, i + half + 1)

        xx, yy = x[i0:i1], y[i0:i1]

        w = 1.0 / (np.abs(xx - x[i]) + 1e-9)

        y_fit[i] = np.sum(w * yy) / np.sum(w)

    return y_fit




def savgol_manual(y, window=11, poly=2):

    n = len(y)

    if n < 3: return y.copy()

    if window % 2 == 0: window += 1

    window = min(window, n if n % 2 == 1 else n - 1)

    if window < 3: return y.copy()

    half = window // 2

    ypad = np.pad(y, (half, half), mode='edge')

    x = np.arange(-half, half + 1, 1, dtype=float)

    X = np.vstack([x**k for k in range(poly + 1)]).T

    beta = np.linalg.pinv(X.T @ X) @ X.T

    filt = beta[0]

    return np.convolve(ypad, filt[::-1], 'valid')




def apply_anchors_and_monotone_soft(

    x, y, anchors=None, monotone='nonincreasing', alpha=0.6, win=21, x_tol_frac=0.03, *,

    force_exact=False  # << aggiunto

):

    if anchors is None or len(anchors) == 0:

        y_adj = y.copy()

    else:

        y_adj = y.copy(); n = len(y_adj)

        if win % 2 == 0: win += 1

        half = win // 2

        x0, x1 = float(np.min(x)), float(np.max(x)); xr = max(1.0, x1 - x0)

        x_tol = x_tol_frac * xr

        for xa, ya in anchors:

            idx = int(np.argmin(np.abs(x - xa)))

            if abs(x[idx] - xa) > x_tol:

                # se l'anchor cade fra nodi della griglia, usiamo l'interpolazione locale

                y_loc = float(np.interp(xa, x, y_adj))

                x_center = xa

                # spingi un intorno con kernel gaussiano verso il target

                # (qui usiamo alpha come "forza")

                # Trova finestra più vicina all'indice

                idx = max(0, min(n-1, np.searchsorted(x, xa)))

            else:

                y_loc = y_adj[idx]

                x_center = x[idx]




            # se force_exact: impone il valore esatto nel nodo più vicino

            if force_exact:

                y_adj[idx] = ya




            # distribuisce localmente la correzione (morbida) per evitare cuspidi

            y_target = (1.0 - alpha) * y_loc + alpha * ya

            delta = y_target - y_loc

            i0 = max(0, idx - half); i1 = min(n, idx + half + 1); xx = x[i0:i1]

            if len(xx) >= 2:

                sigma = max(1e-9, 0.25 * (xx[-1] - xx[0]))

            else:

                sigma = 1.0

            w = np.exp(-0.5 * ((xx - x_center) / sigma) ** 2)

            w /= max(1e-9, np.max(w))   # picco del kernel = 1

            y_adj[i0:i1] += delta * w   # il nodo centrale ≈ delta, i vicini meno




    if monotone == 'nonincreasing':

        for i in range(1, len(y_adj)):

            if y_adj[i] > y_adj[i - 1]:

                y_adj[i] = y_adj[i - 1]

    elif monotone == 'nondecreasing':

        for i in range(1, len(y_adj)):

            if y_adj[i] < y_adj[i - 1]:

                y_adj[i] = y_adj[i - 1]

    return y_adj




def parse_anchors(text):

    out = []

    if not text.strip(): return out

    for p in text.split(";"):

        p = p.strip()

        if not p: continue

        try: x, y = map(float, p.split(",")); out.append((x, y))

        except: pass

    return out




def run_pipeline_from_arrays(

    x, y,

    method='sigmoid',

    poly_deg=3,

    loess_frac=0.25,

    anchors=None,

    start_flat_frac=0.0,

    start_flat_strength=0.35,

    sg_window=0,

    sg_poly=2,

    ma_frac=0.0,

    clip_min=0.0,

    clip_max=100.0,

    monotone='nonincreasing',

    alpha=0.6,

    force_exact=False,   

    use_density_weights=True,

    density_bandwidth=0.1,

    n_grid=400

):

    if x is None or y is None:

        return np.array([]), np.array([])

    x = np.asarray(x, float); y = np.asarray(y, float)

    if len(x) == 0 or len(y) == 0:

        return np.array([]), np.array([])

    if len(x) != len(y):

        n = min(len(x), len(y)); x, y = x[:n], y[:n]




    # Ordina e compatta duplicati su x (media y)

    order = np.argsort(x); x = x[order]; y = y[order]

    ux, first_idx, counts = np.unique(x, return_index=True, return_counts=True)

    if np.any(counts > 1):

        y_mean = np.zeros_like(ux, float)

        for i, (i0, c) in enumerate(zip(first_idx, counts)):

            y_mean[i] = np.mean(y[i0:i0+c])

        x, y = ux, y_mean




    # Griglia monotona

    x_min, x_max = float(np.min(x)), float(np.max(x))

    if not np.isfinite(x_min) or not np.isfinite(x_max) or x_min == x_max:

        return x.copy(), np.clip(y.copy(), clip_min, clip_max)

    xgrid = np.linspace(x_min, x_max, int(n_grid))




    method = (method or "raw").strip().lower()

    is_parametric = method in ('sigmoid', 'sigmoid_refined', 'polynomial')




    # Fit -> y_base

    if method == 'sigmoid':

        p = fit_sigmoid(x, y)

        y_base = predict_sigmoid(p, xgrid)

    elif method == 'sigmoid_refined':

        p = fit_sigmoid_refined(x, y, use_weights=use_density_weights,

                                density_bandwidth=density_bandwidth)

        y_base = predict_sigmoid(p, xgrid)

    elif method == 'polynomial':

        coeff = np.polyfit(x, y, poly_deg)

        y_base = np.polyval(coeff, xgrid)

    elif method == 'loess':

        y_loess_at_x = fit_loess(x, y, frac=loess_frac)

        y_base = np.interp(xgrid, x, y_loess_at_x)

    elif method == 'raw':

        y_base = np.interp(xgrid, x, y)

        is_parametric = False

    else:

        raise ValueError(f"Unknown method: {method}")




    # >>> Iinitialization always present <<<

    y_out = y_base.copy()




    # Smoothing/plateau (only for non-parametric)

    if not is_parametric:

        if sg_window > 0:

            y_out = savgol_manual(y_out, window=sg_window, poly=sg_poly)

        if ma_frac > 0 and len(y_out) >= 3:

            ma_win = max(3, int(len(y_out) * ma_frac))

            if ma_win % 2 == 0:

                ma_win += 1

            pad = ma_win // 2

            ypad = np.pad(y_out, (pad, pad), mode='edge')

            kernel = np.ones(ma_win) / ma_win

            y_out = np.convolve(ypad, kernel, mode='valid')

        if start_flat_frac > 0 and start_flat_strength > 0 and len(y_out) > 4:

            x0, x1 = xgrid[0], xgrid[-1]

            xr = max(1.0, x1 - x0)

            thr = x0 + start_flat_frac * xr

            k = max(5, int(0.01 * len(y_out)))

            y_plateau = float(np.percentile(y_out[:k], 90))

            mask = xgrid <= thr

            if np.any(mask):

                w = 0.5 * (1.0 + np.cos(np.pi * (xgrid[mask] - thr) / max(1e-9, thr - x0)))

                y_out[mask] = (1.0 - start_flat_strength * w) * y_out[mask] + (start_flat_strength * w) * y_plateau




    # Anchor + monotonia SOLO se richiesto

    _do_monotone = monotone in ('nonincreasing','nondecreasing')

    _has_anchors = bool(anchors and len(anchors) > 0)

    if _do_monotone or _has_anchors:

        y_out = apply_anchors_and_monotone_soft(

            xgrid, y_out, anchors=(anchors or []),

            monotone=monotone, alpha=alpha, force_exact=force_exact 

        )




    return xgrid, np.asarray(np.clip(y_out, clip_min, clip_max), float)




def get_upload_payload(upl):

    val = upl.value

    if val is None: return None, None

    if isinstance(val, (tuple, list)) and len(val) > 0:

        uf = val[0]

        name = getattr(uf, "name", "uploaded.csv")

        content = getattr(uf, "content", None)

        return name, content

    if isinstance(val, dict) and len(val) > 0:

        name = next(iter(val.keys()))

        content = val[name].get("content", None)

        return name, content

    return None, None

def read_last_sheet_from_bytes(xlsx_bytes):

    """Ritorna (df, sheet_name) dell'ULTIMO foglio non vuoto."""

    xls = pd.ExcelFile(io.BytesIO(xlsx_bytes))

    for name in reversed(xls.sheet_names):  # parte dall'ultimo

        df = pd.read_excel(xls, sheet_name=name)

        if len(df) > 0:

            return df, name

    # fallback: ultimo anche se vuoto

    name = xls.sheet_names[-1]

    return pd.read_excel(xls, sheet_name=name), name




# ============================================================================

# DOSE CALCULATION FUNCTION

# ============================================================================




def calculate_cumulative_doses(df, params):

    df_out = df.copy()




    # --- parametri (maiuscoli) ---

    h_day = params['Hours_up_per_day']

    days  = params['Days_real']

    up_c  = params['Upstream_Conc']

    std1  = params['Std_Conc_1']

    std2  = params['Std_Conc_2']




    total_h   = h_day * days

    n_rows    = len(df_out)

    h_per_row = total_h / n_rows if n_rows > 0 else 0.0




    # timebase

    df_out['Hours_up_per_row'] = h_per_row

    df_out['Cum_up_hours']     = h_per_row * (df_out.index + 1)




    if 'Eff' not in df_out.columns:

        raise KeyError("Colonna 'Eff' mancante: rinomina 'eff' -> 'Eff' prima del calcolo.")




    # --------- 1) Exposure nominale (NON pesata per Eff) ---------

    df_out['Exposure_step_up'] = up_c  * h_per_row

    df_out['Exposure_step_s1'] = std1 * h_per_row

    df_out['Exposure_step_s2'] = std2 * h_per_row




    df_out['Exposure_Upstream'] = df_out['Exposure_step_up'].cumsum()

    df_out['Exposure_Std1']     = df_out['Exposure_step_s1'].cumsum()

    df_out['Exposure_Std2']     = df_out['Exposure_step_s2'].cumsum()




    # forza monotonia numerica

    df_out['Exposure_Upstream'] = np.maximum.accumulate(df_out['Exposure_Upstream'])

    df_out['Exposure_Std1']     = np.maximum.accumulate(df_out['Exposure_Std1'])

    df_out['Exposure_Std2']     = np.maximum.accumulate(df_out['Exposure_Std2'])




    # --------- 2) Capacità usata (PESATA per Eff) ---------

    eff_frac = np.clip(df_out['Eff'].to_numpy(dtype=float) / 100.0, 0.0, 1.0)

    df_out['CapUsed_step_up']  = up_c * h_per_row * eff_frac

    df_out['CapUsed_Upstream'] = np.maximum.accumulate(df_out['CapUsed_step_up'].cumsum())




    summary = {

        'Total_UP_hours':        float(total_h),

        'N_microloops':          int(n_rows),

        # NB: per la "Dose totale" usiamo l'Exposure nominale (es. 20h * 19d * 400 = 152000)

        'Dose_Total_upstream':   float(df_out['Exposure_Upstream'].iloc[-1]) if n_rows > 0 else 0.0

    }

    return df_out, summary

# ============================================================================

# PART 2: MAIN CODE, UI & EXECUTION

# Copy this AFTER Part 1 in your Jupyter notebook

# ============================================================================




%matplotlib widget

import io, os, base64, numpy as np, matplotlib.pyplot as plt

from IPython.display import display, HTML

import matplotlib

# fallback se il backend widget non è attivo

if 'widget' not in matplotlib.get_backend().lower():

    matplotlib.use('module://matplotlib_inline.backend_inline')

import ipywidgets as widgets

import warnings, re

warnings.filterwarnings("ignore")




# --- XLSX builders & download link helpers ---

def build_xlsx_bytes(sheets: dict) -> bytes:

    """

    Crea un XLSX in memoria da {sheet_name: DataFrame} con fallback engine:

    1) openpyxl  2) xlsxwriter. Se mancano entrambi → errore esplicito.

    """

    import io, re, pandas as pd

    bio = io.BytesIO()

    engines = ["openpyxl", "xlsxwriter"]

    last_err = None




    for eng in engines:

        try:

            bio.seek(0); bio.truncate(0)

            with pd.ExcelWriter(bio, engine=eng) as xl:

                for name, df in sheets.items():

                    safe = re.sub(r"[^A-Za-z0-9_]", "_", str(name))[:31] or "Sheet1"

                    df.to_excel(xl, sheet_name=safe, index=False)

            bio.seek(0)

            return bio.getvalue()

        except ModuleNotFoundError as e:

            last_err = e

            continue




    raise RuntimeError(

        "No Excel engine available. Please install either 'openpyxl' or 'xlsxwriter'. "

        f"Last error: {last_err}"

    )




def make_download_widget(xlsx_bytes: bytes, filename: str, label: str="Download XLSX"):

    """

    Ritorna un widgets.HTML con link base64 per scaricare l'XLSX.

    """

    import base64, ipywidgets as widgets

    b64 = base64.b64encode(xlsx_bytes).decode()

    return widgets.HTML(

        f'<a download="{filename}" href="data:application/vnd.openxmlformats-officedocument.spreadsheetml.sheet;base64,{b64}" '

        f'style="font-weight:600;text-decoration:none;">📥 {label}</a>'

    )

# ============================================================================

# UI WIDGETS

# ============================================================================




uploader = widgets.FileUpload(accept='.csv,.txt,.xlsx', multiple=False)

hours_up_day_w = widgets.FloatText(value=20, description='Hours UP/Day')

days_real_w = widgets.FloatText(value=45, description='Days real')

upstream_conc_w = widgets.FloatText(value=400, description='Upstream ppbv')

std_conc_1_w = widgets.FloatText(value=10, description='Std #1 ppbv')

std_conc_2_w = widgets.FloatText(value=15, description='Std #2 ppbv')




method_w = widgets.Dropdown(

    options=[

        ('Polynomial fit', 'polynomial'),

        ('LOESS smoothing', 'loess'),

        ('Sigmoid (refined)', 'sigmoid_refined'),

        ('Sigmoid (basic)', 'sigmoid'),

        ('Raw interpolation', 'raw')

    ],

    value='sigmoid_refined',

    description='Method'

)

poly_deg_w = widgets.BoundedIntText(value=3, min=1, max=10, description='Poly deg')

loess_frac_w = widgets.FloatSlider(value=0.25, min=0.05, max=0.9, step=0.05, description='Loess')

sg_window_w = widgets.IntSlider(value=0, min=0, max=101, step=1, description='SG win')

sg_poly_w = widgets.BoundedIntText(value=2, min=1, max=5, description='SG poly')

ma_frac_w = widgets.FloatSlider(value=0.0, min=0.0, max=0.2, step=0.005, description='MA frac')

alpha_w = widgets.FloatSlider(value=0.6, min=0.0, max=1.0, step=0.05, description='Anchor α')

start_flat_frac_w = widgets.FloatSlider(value=0.02, min=0.0, max=1.0, step=0.05, description='Flat frac')

start_flat_strength_w = widgets.FloatSlider(value=0.35, min=0.0, max=1.0, step=0.05, description='Flat str')

anchors_w = widgets.Text(value='', description='Anchors', placeholder='x1,y1; x2,y2 (optional)')

monotone_w = widgets.Dropdown(options=['none','nonincreasing','nondecreasing'], value='none', description='Monotone')

clip_min_w = widgets.FloatSlider(value=0.0, min=0.0, max=100.0, description='Clip min')

clip_max_w = widgets.FloatSlider(value=100.0, min=0.0, max=100.0, description='Clip max')

legend_label_w = widgets.Text(value='Fitted curve', description='Legend label:')




output_name_w = widgets.Text(value='efficiency_output', description='Output')

run_btn = widgets.Button(description='▶ RUN', button_style='success')

out_box = widgets.Output()




raw_color_w   = widgets.ColorPicker(value='#1f77b4', description='Raw')

trend_color_w = widgets.ColorPicker(value='#d62728', description='Trend')

anchor_color_w= widgets.ColorPicker(value='#FFD700', description='Anchor')

show_points_w = widgets.Checkbox(value=True, description='Show points')




# Live tuning / assi

auto_run_w = widgets.Checkbox(value=False, description='Auto-RUN on change')

ymin_w     = widgets.FloatSlider(value=70, min=0, max=100, step=1.0, description='Y min (%)')

ymax_w     = widgets.FloatSlider(value=100, min=0, max=100, step=1.0, description='Y max (%)')




# Density-based weighting

use_density_w       = widgets.Checkbox(value=True, description='Use density weighting')

density_bandwidth_w = widgets.FloatSlider(value=0.10, min=0.01, max=0.50, step=0.01, description='Density BW')




# Time-series tuning

ts_sg_window_w = widgets.IntSlider(value=0, min=0, max=101, step=1, description='TS SG win')

ts_ma_frac_w   = widgets.FloatSlider(value=0.00, min=0.0, max=0.2, step=0.005, description='TS MA frac')




# --- Download buttons & output areas (UI) ---

btn_dl_left  = widgets.Button(description='⬇ Download left plot data (XLSX)', button_style='')

btn_dl_right = widgets.Button(description='⬇ Download right plot data (XLSX)', button_style='')




dl_left_out  = widgets.Output(layout={'border': '1px solid #eee'})

dl_right_out = widgets.Output(layout={'border': '1px solid #eee'})

force_exact_w = widgets.Checkbox(value=False, description='Anchors: exact pin')

# ============================================================================

# MAIN FUNCTION

# ============================================================================




def on_run(_):

    with out_box:

        out_box.clear_output()

        print("🚀 Running...")

        export_df = None

        name, content = get_upload_payload(uploader)

        if not content:

            print("⚠️ Upload file first")

            return




        try:

            print(f"📄 Reading: {name}")

            is_eff = False

            df_eff = None

            x_data = None

            y_data = None




            # === FILE READING ===

            if name.endswith(('.xlsx', '.xls')):

                df_eff, last_sheet = read_last_sheet_from_bytes(content)

                print(f"📑 Using last sheet: '{last_sheet}'")

                df_eff.columns = [c.strip() for c in df_eff.columns]

                if 'eff' in [c.lower() for c in df_eff.columns]:

                    df_eff.rename(columns={next(c for c in df_eff.columns if c.lower()=='eff'): 'Eff'}, inplace=True)

                    assert 'Eff' in df_eff.columns or 'Eff' not in [c.strip() for c in df_eff.columns], "Eff non presente dopo normalizzazione"

                if 'Eff' in df_eff.columns:

                    is_eff = True

                    print("✅ Efficiency data (Excel)")

                else:

                    x_data = df_eff.iloc[:, 0].values

                    y_data = df_eff.iloc[:, 1].values

                    print("✅ 2-col data")                    

            else:

                csv_text = content.decode('utf-8', errors='ignore') if hasattr(content, 'decode') else io.BytesIO(content).read().decode('utf-8', 'ignore')

                try:

                    df_test = pd.read_csv(io.StringIO(csv_text), sep='\t', nrows=5)

                    if 'eff' in [c.lower() for c in df_test.columns]:

                        df_eff = pd.read_csv(io.StringIO(csv_text), sep='\t')

                        # --- NORMALIZZAZIONE COLONNE EFFICIENCY (sempre) ---

                        df_eff.columns = [c.strip() for c in df_eff.columns]

                        if 'eff' in [c.lower() for c in df_eff.columns]:

                            df_eff.rename(columns={next(c for c in df_eff.columns if c.lower()=='eff'): 'Eff'}, inplace=True)

                            assert 'Eff' in df_eff.columns, "Eff non presente dopo normalizzazione (tab)"

                        is_eff = True

                        print("✅ Efficiency data (tab)")

                    else:

                        x_data, y_data = read_two_cols_anysep(csv_text, True)

                        print("✅ 2-col data")

                except:

                    x_data, y_data = read_two_cols_anysep(csv_text, True)

                    print("✅ 2-col data")




            # === DATA PREPARATION ===

            if is_eff and df_eff is not None:

                req = ['loop', 'up', 'down1', 'zeroair', 'c_in', 'c_out', 'Eff']

                if any(c not in df_eff.columns for c in req):

                    missing = [c for c in req if c not in df_eff.columns]

                    print(f"❌ Missing cols: {missing}")

                    return

            

                # 1) Calcolo dosi cumulative (QUI mancava la chiamata)

                params = {

                    'Hours_up_per_day': hours_up_day_w.value,

                    'Days_real':        days_real_w.value,

                    'Upstream_Conc':    upstream_conc_w.value,

                    'Std_Conc_1':       std_conc_1_w.value,

                    'Std_Conc_2':       std_conc_2_w.value

                }

                df_result, summary = calculate_cumulative_doses(df_eff, params)

            

                # 2) Summary (chiavi corrette, rimosso la parentesi extra)

                print(f"✅ Total hrs: {summary['Total_UP_hours']:.1f} | Loops: {summary['N_microloops']} | Dose: {summary['Dose_Total_upstream']:.0f}")

            

                # 3) Serie per fit (campi con maiuscole corretti)

                x_data = df_result['Exposure_Upstream'].values  # dose nominale per il fit

                y_data = df_result['Eff'].values

                x_raw = x_data.copy()

                y_raw = y_data.copy()

            

                # 4) Monotonia dose (assert facoltativo)

                assert np.all(np.diff(x_data) >= -1e-9), "Dose_Upstream non monotona!"

            else:

                print(f"✅ {len(x_data)} points")

                x_raw = x_data.copy()

                y_raw = y_data.copy()

                df_result = None




            # === FIT ===

            print(f"🎨 Fitting {method_w.value} on {len(x_data)} points...")

            if use_density_w.value:

                print(f"   Using density weighting (bandwidth={density_bandwidth_w.value:.2f})")

            x_fit, y_fit = run_pipeline_from_arrays(

                x=x_data, y=y_data,

                method=method_w.value,

                poly_deg=poly_deg_w.value,

                loess_frac=loess_frac_w.value,

                anchors=parse_anchors(anchors_w.value),

                sg_window=sg_window_w.value, sg_poly=sg_poly_w.value, ma_frac=ma_frac_w.value,

                clip_min=clip_min_w.value, clip_max=clip_max_w.value,

                monotone=(monotone_w.value if monotone_w.value != 'none' else 'none'),

                start_flat_frac=start_flat_frac_w.value,

                start_flat_strength=start_flat_strength_w.value,

                alpha=alpha_w.value,

                force_exact=force_exact_w.value,       

                use_density_weights=use_density_w.value,

                density_bandwidth=density_bandwidth_w.value

            )

            print("✅ Done")

            # --- Anchor integration (make them part of RAW and FIT data) ---

            _anc = parse_anchors(anchors_w.value)

            has_anc = bool(_anc)

            

            # 1) FIT augmented: aggiungi gli x degli anchor alla griglia fit e interpola

            x_fit_aug, y_fit_aug = x_fit, y_fit

            if has_anc:

                anc_x = np.array([a[0] for a in _anc], dtype=float)

                x_fit_aug = np.sort(np.unique(np.r_[x_fit, anc_x]))

                y_fit_aug = np.interp(x_fit_aug, x_fit, y_fit)

            

                # se "exact pin" è attivo, impone Y=anchor anche nell'array fittato

                if force_exact_w.value:

                    for xa, ya in _anc:

                        i = int(np.searchsorted(x_fit_aug, xa))

                        i = max(0, min(i, len(x_fit_aug)-1))

                        if np.isclose(x_fit_aug[i], xa, rtol=0, atol=1e-12):

                            y_fit_aug[i] = ya

            

            # 2) RAW augmented: inserisci una riga raw per ogni anchor mancante

            x_raw_aug = x_raw.copy()

            y_raw_aug = y_raw.copy()

            is_anchor_aug = np.zeros_like(x_raw_aug, dtype=bool)

            

            if has_anc:

                import pandas as pd

                df_raw = pd.DataFrame({"x": x_raw_aug, "y": y_raw_aug, "is_anchor": is_anchor_aug})

                for xa, ya in _anc:

                    if not np.any(np.isclose(df_raw["x"].values, xa, rtol=0, atol=1e-9)):

                        df_raw = pd.concat(

                            [df_raw, pd.DataFrame({"x":[xa], "y":[ya], "is_anchor":[True]})],

                            ignore_index=True

                        )

                    else:

                        idx = np.where(np.isclose(df_raw["x"].values, xa, rtol=0, atol=1e-9))[0][0]

                        df_raw.loc[idx, "is_anchor"] = True

                        # Se vuoi anche forzare il valore raw alla Y anchor, sblocca la riga sotto:

                        # df_raw.loc[idx, "y"] = ya

            

                df_raw = df_raw.sort_values("x", kind="mergesort").reset_index(drop=True)

                df_raw = df_raw.groupby("x", as_index=False).apply(

                    lambda g: g.iloc[np.argmax(g["is_anchor"].values)] if (g["is_anchor"].any()) else g.iloc[0]

                ).reset_index(drop=True)

            

                x_raw_aug = df_raw["x"].to_numpy(dtype=float)

                y_raw_aug = df_raw["y"].to_numpy(dtype=float)

                is_anchor_aug = df_raw["is_anchor"].to_numpy(dtype=bool)

            

            # 3) Valori di fit sulle X RAW augmented (coerenti con la curva finale)

            fit_on_raw_aug = np.interp(

                x_raw_aug, x_fit_aug, y_fit_aug,

                left=y_fit_aug[0], right=y_fit_aug[-1]

            )

            # === PLOT & EXPORT (full block aligned) ===

            

            # 1) Fit su dose->efficienza (x_fit, y_fit) già calcolati sopra

            #    NB: x_fit è una griglia monotona (run_pipeline_from_arrays), y_fit la curva fitted.

            if len(x_fit) == 0 or len(y_fit) == 0:

                print("⚠️ Fit vuoto: controlla dati/metodo di fitting.")

                return

            

            # 2) Prepara dati per il pannello Tempo: mappa la curva fitted sulle dosi reali nel tempo

            if is_eff and df_result is not None:

                time_days   = df_result['Cum_up_hours'].values / 24.0

                dose_series = df_result['Exposure_Upstream'].values

                # efficienza fitted alle dosi reali (interpolazione sicura su x_fit monotona)

                eff_over_time = np.interp(dose_series, x_fit, y_fit)

            else:

                time_days   = np.array([])

                dose_series = np.array([])

                eff_over_time = np.array([])

            # === dose → days mapping function ===

            # Questa funzione sarà usata negli export per ottenere "time_fit_days"

            if is_eff and len(dose_series) > 0:

                def _dose_to_days(arr):

                    return np.interp(arr, dose_series, time_days,

                                     left=time_days[0], right=time_days[-1])

            else:

                # fallback: se non abbiamo la mappa reale, usiamo un fattore nominale

                def _dose_to_days(arr):

                    nominal = float(upstream_conc_w.value) * float(hours_up_day_w.value)  # ppbv·h per giorno

                    nominal = nominal if nominal > 0 else 1.0

                    return np.asarray(arr, float) / nominal

            # 3) Layout: 2 pannelli allineati

            plt.close('all')

            print("📈 Plotting...")

            

            import matplotlib.gridspec as gridspec

            fig = plt.figure(figsize=(12, 5))

            # disattiva qualsiasi layout engine automatico che potrebbe tagliare i bordi

            try:

                fig.set_layout_engine(None)

            except Exception:

                pass

            

            gs  = gridspec.GridSpec(1, 2, figure=fig, width_ratios=[1, 1])

            axA = fig.add_subplot(gs[0, 0])

            axB = fig.add_subplot(gs[0, 1])

            

            # margini espliciti: molto più spazio a destra per label + ticks dell’asse destro

            plt.subplots_adjust(left=0.08, right=0.88, top=0.92, bottom=0.15, wspace=0.30)

            

            # Panel A: Efficiency vs Dose

            if show_points_w.value:

                axA.scatter(

                    x_raw_aug, y_raw_aug,

                    s=12, alpha=0.5, color=raw_color_w.value, label='Raw'

                )

                if has_anc and np.any(is_anchor_aug):

                    axA.scatter(

                        x_raw_aug[is_anchor_aug], y_raw_aug[is_anchor_aug],

                        s=40, marker='o', color=anchor_color_w.value,

                        edgecolors='k', linewidths=0.5, zorder=5, label='Anchors'

                    )

            

            # curva fit “augmented”

            axA.plot(

                x_fit_aug, y_fit_aug,

                lw=2.2, color=trend_color_w.value, label=legend_label_w.value

            )

            

            axA.set_xlabel(f'Cumulative Dose @ {int(upstream_conc_w.value)} ppbv (ppbv·h)')

            axA.set_ylabel

            axA.set_title('Efficiency vs Dose', fontweight='bold')

            axA.grid(alpha=0.3)

            axA.legend(loc='lower left')

            

            # Applica i limiti Y dai controlli

            y0 = float(min(ymin_w.value, ymax_w.value))

            y1 = float(max(ymin_w.value, ymax_w.value))

            if y1 - y0 < 1e-6:

                y1 = y0 + 1.0

            axA.set_ylim(y0, y1)

                

            # Panel B: Time evolution (con smoothing tunabile)

            axB.set_title('Time Evolution', fontweight='bold')

            

            # fallback: nessun plot se non ho dati temporali

            if time_days.size == 0:

                                # ...existing code...
                def esegui_routine(items: list[str]) -> list[str]:
                    """
                    Normalizza una lista di stringhe:
                    - trim spazi
                    - ignora vuoti
                    - rimuove duplicati mantenendo l'ordine
                    """
                    visti = set()
                    out = []
                    for raw in items:
                        s = raw.strip()
                        if not s or s in visti:
                            continue
                        visti.add(s)
                        out.append(s)
                    return out
                
                if __name__ == "__main__":
                    esempio = ["  uno", "due", "uno ", " ", "tre"]
                    print(esegui_routine(esempio))
                # ...existing code...

            else:

                eff_plot  = eff_over_time.copy()

                dose_plot = dose_series.copy()

            

                # Savitzky-Golay (se >0 e finestra dispari)

                if ts_sg_window_w.value > 2:

                    win = int(ts_sg_window_w.value)

                    if win % 2 == 0: win += 1

                    eff_plot = savgol_manual(eff_plot, window=win, poly=min(3, sg_poly_w.value))

            

                # Moving average efficienza

                if ts_ma_frac_w.value > 0 and len(eff_plot) >= 3:

                    m = max(3, int(len(eff_plot) * ts_ma_frac_w.value))

                    if m % 2 == 0: m += 1

                    pad = m // 2

                    kern = np.ones(m) / m

                    eff_plot = np.convolve(np.pad(eff_plot, (pad, pad), mode='edge'), kern, mode='valid')

            

            

                # --- Mantiene lunghezze coerenti dopo smoothing ---

                n = min(len(time_days), len(eff_plot), len(dose_plot))

                time_use = time_days[:n]

                eff_plot  = eff_plot[:n]

                dose_plot = dose_plot[:n]

            

                # ---------- Primo asse: efficienza ----------

                axB.plot(time_use, eff_plot, '-', lw=2.0, color='#2ca02c', label=EFFICIENCY_LABEL)

                axB.set_xlabel('Time (days)')

                axB.set_ylabel(EFFICIENCY_LABEL, color='#2ca02c')

                axB.tick_params(axis='y', labelcolor='#2ca02c')

                

                # X con margine 5%

                x0, x1 = float(time_use[0]), float(time_use[-1])

                pad = 0.05 * (x1 - x0) if x1 > x0 else 0.5

                axB.set_xlim(x0 - pad, x1 + pad)             

                

                # Applica i limiti Y anche al pannello tempo

                axB.set_ylim(y0, y1)

  

                axB.grid(alpha=0.3)

                            

            plt.show()

            

                        # ---------- Download handlers (placed after plt.show()) ----------

            safe = "".join(c for c in output_name_w.value if c.isalnum() or c in ("-","_",".")) or "output"




            # evita doppie bind se RUN viene premuto più volte

            try:

                btn_dl_left._click_handlers.callbacks = []

                btn_dl_right._click_handlers.callbacks = []

            except Exception:

                pass

            

            def _on_dl_left(_):

                import pandas as pd, traceback

                with dl_left_out:

                    dl_left_out.clear_output()

                    try:

                        # guard: dati minimi

                        if 'x_raw_aug' not in locals() or 'y_raw_aug' not in locals() or len(x_raw_aug) == 0:

                            display(HTML("<em>No left-plot raw data available.</em>"))

                            return

                        if 'x_fit_aug' not in locals() or 'y_fit_aug' not in locals() or len(x_fit_aug) == 0:

                            display(HTML("<em>No fitted curve available. Exporting raw only.</em>"))

                        

                        # sheet unico: RAW + FIT (alla stessa X dei raw) + flag anchor + tempo

                        cols = {

                            "dose_ppbv_h": x_raw_aug,

                            "eff_raw_pct": y_raw_aug,

                            "eff_fit_pct": fit_on_raw_aug,

                            "is_anchor":   is_anchor_aug,

                        }

                        

                        # mappa dose -> giorni per i RAW (se disponibile la serie reale)

                        try:

                            cols["time_at_raw_days"] = _dose_to_days(x_raw_aug)

                        except Exception:

                            nominal = float(upstream_conc_w.value) * float(hours_up_day_w.value) or 1.0

                            cols["time_at_raw_days"] = np.asarray(x_raw_aug, float) / nominal

                        

                        df_left = pd.DataFrame(cols)

                        

                        sheets = {"eff_vs_dose": df_left}

            

                        # (opzionale) aggiungi anche la curva su griglia per debug

                        if 'x_fit_aug' in locals() and len(x_fit_aug) and len(y_fit_aug):

                            sheets["fit_grid"] = pd.DataFrame({

                                "dose_grid_ppbv_h": x_fit_aug,

                                "eff_fit_pct": y_fit_aug,

                                "time_fit_days": _dose_to_days(x_fit_aug)

                            })

            

                        xlsx_bytes = build_xlsx_bytes(sheets)

                        display(make_download_widget(

                            xlsx_bytes,

                            f"{safe}_eff_vs_dose.xlsx",

                            "Download left plot data (XLSX)"

                        ))

            

                    except Exception:

                        print("Export error (left):")

                        traceback.print_exc()

            

            def _on_dl_right(_):

                import pandas as pd, traceback

                with dl_right_out:

                        dl_right_out.clear_output()

                        try:

                            if 'time_use' not in locals() or 'eff_plot' not in locals() or len(time_use) == 0:

                                display(HTML("<em>No time-series available for export.</em>"))

                                return

                    

                            sheets = {

                                "time_series": pd.DataFrame({

                                    "time_days":       time_use,

                                    "eff_fit_percent": np.clip(eff_plot, 0, 100)

                                })

                            }

                            xlsx_bytes = build_xlsx_bytes(sheets)

                            display(make_download_widget(

                                xlsx_bytes,

                                f"{safe}_time_panel.xlsx",

                                "Download right plot data (XLSX)"

                            ))

                        except Exception:

                            print("Export error (right):")

                            traceback.print_exc()

            

            # (re)bind handler

            btn_dl_left.on_click(_on_dl_left)

            btn_dl_right.on_click(_on_dl_right)

            

            # render pulsanti/box (se non lo fai altrove)

            display(

                widgets.HBox([

                    widgets.VBox([btn_dl_left,  dl_left_out],  layout=widgets.Layout(width='50%')),

                    widgets.VBox([btn_dl_right, dl_right_out], layout=widgets.Layout(width='50%'))

                ])

            )

            # 5) EXPORT – struttura tabellare unica

            if is_eff and df_result is not None:

                # Costruisco la tabella esperimento per esperimento

                df_export = df_result.copy()

                if 'loop' in df_export.columns:

                    df_export = df_export.drop(columns=['loop'])

                df_export = df_export.reset_index().rename(columns={'index': 'loop'})

                df_export['time_days'] = df_export['Cum_up_hours'] / 24.0

                

                # Efficienza fittata per ogni riga

                df_export['eff_smooth'] = np.interp(

                    df_export['Exposure_Upstream'].values,

                    x_fit, y_fit,

                    left=y_fit[0], right=y_fit[-1]

                )

                

                # Backward-compat: duplica Exposure_Upstream in Dose_Upstream se serve altrove

                df_export['Dose_Upstream'] = df_export['Exposure_Upstream']

                

                # Ridenomina per output pulito

                df_export = df_export.rename(columns={

                    'loop':             'Loop',

                    'Cum_up_hours':     'Cum_Up_Hours',

                    'time_days':        'Time_Days',

                    'eff_smooth':       'Eff_Smooth',

                    'Exposure_Upstream':'Exposure_Upstream',

                    'Dose_Std1':        'Dose_Std1',

                    'Dose_Std2':        'Dose_Std2',

                    'Eff':              'Eff'

                })

                # --- Retro-compat: crea le colonne Dose_Std1/Dose_Std2 se non esistono ---

                if 'Dose_Std1' not in df_export.columns:

                    if 'Exposure_Std1' in df_export.columns:

                        df_export['Dose_Std1'] = df_export['Exposure_Std1']

                    else:

                        df_export['Dose_Std1'] = np.nan

                

                if 'Dose_Std2' not in df_export.columns:

                    if 'Exposure_Std2' in df_export.columns:

                        df_export['Dose_Std2'] = df_export['Exposure_Std2']

                    else:

                        df_export['Dose_Std2'] = np.nan

                # Ordine colonne (includo sia Exposure_Upstream sia Dose_Upstream per compatibilità)

                cols = [

                    'Loop', 'Time_Days', 'Cum_Up_Hours',

                    'Exposure_Upstream', 'Dose_Upstream',

                    'Exposure_Std1', 'Exposure_Std2',

                    'Eff', 'Eff_Smooth'

                ]

                export_df = df_export[cols]

            else:

                # Se il file NON è "efficiency", esporto solo la curva fittata (minimo indispensabile)

                export_df = pd.DataFrame({

                    'series': ['fit_dose_eff'] * len(x_fit),

                    'x': x_fit,

                    'y': y_fit

                })

        

            # === Salvataggio finale CSV ===

            csv_out = export_df.to_csv(index=False)

            b64 = base64.b64encode(csv_out.encode()).decode()

            safe = "".join(c for c in output_name_w.value if c.isalnum() or c in ("-","_",".")) or "output"

            display(HTML(

                f'<a download="{safe}.csv" href="data:text/csv;base64,{b64}" '

                f'style="font-size:14px;font-weight:bold">📥 Download {safe}.csv</a>'

            ))

            print(f"✅ Complete! {len(export_df)} rows")

        

        except Exception as e:

            import traceback

            print("❌ Error:")

            traceback.print_exc()

run_btn.on_click(on_run)

        

# fuori da on_run

import time

_last_call = {'t': 0.0}

        

def _maybe_auto_run(change):

    if not auto_run_w.value:

        return

    t = time.time()

    if t - _last_call['t'] < 0.15:   # 150 ms debounce

        return

    _last_call['t'] = t

    on_run(None)




# Lista dei widget che triggerano l'auto-run

for _w in [

    method_w, poly_deg_w, loess_frac_w, sg_window_w, sg_poly_w, ma_frac_w,

    start_flat_frac_w, start_flat_strength_w, anchors_w, alpha_w,

    monotone_w, clip_min_w, clip_max_w, use_density_w, density_bandwidth_w,

    hours_up_day_w, days_real_w, upstream_conc_w, std_conc_1_w, std_conc_2_w,

    show_points_w, raw_color_w, trend_color_w, anchor_color_w,

    force_exact_w, ymin_w, ymax_w,

]:

    _w.observe(_maybe_auto_run, names='value')




# ============================================================================

# UI LAYOUT

# ============================================================================




# --- BASICS (essential) ---

basics = widgets.VBox([

    widgets.HBox([uploader, output_name_w], layout=widgets.Layout(justify_content='space-between')),

    widgets.HBox([hours_up_day_w, days_real_w, upstream_conc_w], layout=widgets.Layout(gap='12px')),

    widgets.HBox([std_conc_1_w, std_conc_2_w], layout=widgets.Layout(gap='12px')),

    widgets.HBox([

        method_w,              # usa il widget reale già osservato dal codice

        legend_label_w,

        show_points_w

], layout=widgets.Layout(gap='12px'))

])

# --- ADVANCED (a scomparsa) ---

adv_fit = widgets.VBox([

    widgets.HTML("<b>Fitting & smoothing</b>"),

    widgets.HBox([use_density_w, density_bandwidth_w, poly_deg_w, loess_frac_w]),

    widgets.HBox([sg_window_w, sg_poly_w, ma_frac_w]),

    widgets.HBox([start_flat_frac_w, start_flat_strength_w, alpha_w]),

    widgets.HBox([monotone_w, clip_min_w, clip_max_w]),

    widgets.HBox([anchors_w, force_exact_w]),

])




adv_plot = widgets.VBox([

    widgets.HTML("<b>Plot options</b>"),

    widgets.HBox([raw_color_w, trend_color_w, anchor_color_w]),

    widgets.HBox([ymin_w, ymax_w]),

    widgets.HBox([ts_sg_window_w, ts_ma_frac_w]),

])




advanced = widgets.Accordion(children=[adv_fit, adv_plot])

advanced.set_title(0, "Fitting & smoothing")

advanced.set_title(1, "Plot & time controls")




# --- HELP (English) ---

help_html = widgets.HTML("""

<ul style='margin:0 0 0 16px;padding:0'>

  <li><b>Method</b>: algorithm for the dose → efficiency curve. <i>Sigmoid (refined)</i> is a bounded logistic fit with density-based weighting.</li>

  <li><b>Use density weighting</b>: gives more weight to regions with more points (slider controls the adaptive binning bandwidth).</li>

  <li><b>Anchors</b>: local constraints as "x,y; x2,y2". We apply a smooth correction around anchors while preserving monotonicity.</li>

  <li><b>Target dose line</b>: horizontal line in the time panel to compare against cumulative dose.</li>

</ul>

""")

help_acc = widgets.Accordion(children=[help_html])

help_acc.set_title(0, "What do the controls do?")



# --- HEADER + RUN + LOG ---

run_row = widgets.HBox([run_btn, auto_run_w])

log_title = widgets.HTML("<hr><b>Run log</b>")

ui = widgets.Tab(children=[widgets.VBox([basics, advanced, help_acc, widgets.HTML("<hr>"), run_row, out_box])])

ui.set_title(0, "Efficiency & Dosage Analyzer")

display(ui)


Tab(children=(VBox(children=(VBox(children=(HBox(children=(FileUpload(value=(), accept='.csv,.txt,.xlsx', desc…