# ABF file : Plasmid data experiment

## Screening_sample

In [None]:
CONFIG = {
    # ---- Paths / IO ----
    "ROOT_PATH": "/Users/hugo/New data/PacBio",   # where sample1/, sample2/, ...
    "FILE_GLOB": ["*.dat", "*.abf"],              # support both formats
    "SKIP_ALREADY_PROCESSED": True,               # uses processing_status.json

    # ---- Header parsing (.dat only) ----
    "HEADER_SAMPLERATE_DIV": 1,

    # ---- Baseline estimation ----
    # Alexander uses 1 kHz for normalization (before segmentation)
    "BASELINE_LP_HZ": 1e3,
    "BASELINE_ORDER": 4,

    # ---- Segmentation ----
    # Same normalized gradient threshold and min segment length as Alexander
    "SEG_GRAD_THRESH": 0.3,
    "MIN_SEG_LEN": 50000,
    "SEG_VAR_MAX": 5e-3,         # reject segments with higher normalized variance
    "SEGMENT_LP_HZ": None,       # no extra smoothing before segmentation

    # ---- Feature extraction ----
    # Alexander uses ratio mode: 1 - I/I_norm
    "FEATURE_MODE": "ratio",
    "EPS_DENOM": 1e-12,

    # Low-pass filter on the feature signal
    "FEATURE_LP_ENABLE": True,
    "FEATURE_LP_MODE": "fixed_hz",   # use fixed cutoff (not fraction of fs)
    "FEATURE_LP_FIXED_HZ": 5e5,      # 500 kHz cutoff for feature LP
    "FEATURE_LP_FRAC": 0.20,         # unused unless FEATURE_LP_MODE == "frac_fs"
    "FEATURE_LP_ORDER": 6,

    # ---- Peak detection ----
    "WINDOW_SIZE": 2500,          # samples
    "PEAK_PROMINENCE": 0.2,       # stronger peaks only
    "PEAK_MIN_DISTANCE": 1000,    # samples
    "SNR_REPORT": True,           # compute and include SNR in output

    # ---- Plotting / debug ----
    "BG_PLOT_DOWNSAMPLE": 1000,   # for background.png
    "SEG_PLOT_DOWNSAMPLE": 100,   # for kept/rejected segment plots
    "SAVE_SEGMENT_DEBUG_WHEN_REJECTED": True,
    "SAVE_SEGMENT_DEBUG_WHEN_ACCEPTED": True,

    # ---- Safety / numeric ----
    "MIN_BASELINE_MEDIAN": 1e-18,
    "MIN_WINDOW_SAMPLES": 1000,
    "ENFORCE_DISTANCE_GTE_WINDOW": True,
}

## Process_combined_peaks

In [None]:
CONFIG = {
    # ---- IO / Structure ----
    "ROOT_PATH": "/Users/hugo/New data/PacBio",
    "INPUT_JSON_NAME": "combined_peaks_data.json",
    "RESULTS_SUBDIR": "peak_fits",
    "HIST_SUBDIR": "histograms",
    "SKIP_IF_FIT_EXISTS": True,         # skip a folder if fit_results.json already exists

    # ---- Plotting ----
    "SAVE_PLOTS": True,
    "FIGSIZE": (10, 6),
    "FIG_DPI": 150,
    "PLOT_RAW": True,
    "PLOT_FILTERED": True,
    "PLOT_FITTED": True,
    "XLABEL_TIME": "Time (µs)",

    # ---- Histograms / Stats ----
    "FWHM_HIST_BINS": 50,
    "AREA_HIST_BINS": 300,
    "AREA_XLIM": (0, 20.5),             # set to None for auto limits
    "SAVE_STATS_TXT": True,
    "SAVE_CORR": True,

    # ---- Optional post-fit filtering (for stats/plots only) ----
    "MIN_FWHM": None,                   # e.g., 2.0 (µs)
    "MAX_FWHM": None,                   # e.g., 30.0
    "MIN_AREA": None,                   # e.g., 0.02
    "MAX_AREA": None,
    "MIN_MAX_DISPLACEMENT": None,       # e.g., 0.01
    "MAX_MAX_DISPLACEMENT": None,

    # ---- Refined fit model & robustness ----
    "FIT_MODEL": "gaussian",            # "gaussian" | "supergauss" | "skewgauss"
    "SUPER_GAUSS_P": 2,               # only used if FIT_MODEL == "supergauss" (2 == Gaussian)
    "ROBUST_LOSS": "linear",           # "linear" | "soft_l1" | "huber" | "cauchy"
    "ROBUST_F_SCALE": 1.0,

    # ---- Parameter bounds (units: µs) ----
    "AMP_MIN": 0.05,                
    "AMP_MAX": 0.9,                 
    "SIGMA_MIN": 0.0,               
    "SIGMA_MAX": 10.0,              
    "MU_PAD_US": 20.0,                

    # ---- Optional window refinement around the max before fitting ----
    "REFIT_USE_CENTERED_WINDOW": True,
    "REFIT_HALF_WINDOW_US": 50,       
}

# DAT files 

## Screening_sample

In [None]:
CONFIG = {
    # ---- Paths / IO ----
    "ROOT_PATH": "/Users/hugo/New data/PacBio",   # where sample1/, sample2/, ...
    # NEW: list of globs instead of single string
    "FILE_GLOB": ["*.dat", "*.abf"],
    "SKIP_ALREADY_PROCESSED": True,               # uses processing_status.json

    # ---- Header parsing (sample rate units) ----
    # (used only for .dat Axopatch loader)
    "HEADER_SAMPLERATE_DIV": 1,

    # ---- Baseline estimation (background) ----
    "BASELINE_LP_HZ": 3e4,
    "BASELINE_ORDER": 4,

    # Optional extra smoothing JUST for segmentation (None = disabled)
    "SEGMENT_LP_HZ": None,     # e.g. 1000.0

    # ---- Segmentation (detecting baseline jumps) ----
    "SEG_GRAD_THRESH": 0.30,
    "MIN_SEG_LEN": 30000,
    "SEG_VAR_MAX": 4e-3,       # std/median threshold to KEEP a segment

    # ---- Feature signal (for peak detection) ----
    # "ratio": 1 - I/I_norm (prefers stable nonzero baseline)
    # "diff" : I - I_norm   (if baseline magnitude drifts a lot)
    "FEATURE_MODE": "ratio",
    "EPS_DENOM": 1e-12,

    # Optional low-pass on the feature
    "FEATURE_LP_ENABLE": True,
    "FEATURE_LP_MODE": "frac_fs",      # "frac_fs" or "fixed_hz"
    "FEATURE_LP_FRAC": 0.20,
    "FEATURE_LP_FIXED_HZ": 5000.0,
    "FEATURE_LP_ORDER": 6,

    # ---- Peak picking / window extraction ----
    "WINDOW_SIZE": 2500,        # half-window in samples
    "PEAK_PROMINENCE": 0.1,
    "PEAK_MIN_DISTANCE": 500,   # samples (min dist between peaks)
    "SNR_REPORT": True,

    # ---- Plotting / debug ----
    "BG_PLOT_DOWNSAMPLE": 1000,
    "SEG_PLOT_DOWNSAMPLE": 100,
    "SAVE_SEGMENT_DEBUG_WHEN_REJECTED": True,
    "SAVE_SEGMENT_DEBUG_WHEN_ACCEPTED": True,

    # ---- Safety / numeric ----
    "MIN_BASELINE_MEDIAN": 1e-18,
    "MIN_WINDOW_SAMPLES": 1000,
    "ENFORCE_DISTANCE_GTE_WINDOW": True,
}


## Process_comined_peaks

In [None]:
CONFIG = {
    # ---- IO / Structure ----
    "ROOT_PATH": "/Users/hugo/New data/PacBio",
    "INPUT_JSON_NAME": "combined_peaks_data.json",
    "RESULTS_SUBDIR": "peak_fits",
    "HIST_SUBDIR": "histograms",
    "SKIP_IF_FIT_EXISTS": True,         # skip a folder if fit_results.json already exists

    # ---- Plotting ----
    "SAVE_PLOTS": True,
    "FIGSIZE": (10, 6),
    "FIG_DPI": 150,
    "PLOT_RAW": True,
    "PLOT_FILTERED": True,
    "PLOT_FITTED": True,
    "XLABEL_TIME": "Time (µs)",

    # ---- Histograms / Stats ----
    "FWHM_HIST_BINS": 50,
    "AREA_HIST_BINS": 300,
    "AREA_XLIM": (0, 20.5),             # set to None for auto limits
    "SAVE_STATS_TXT": True,
    "SAVE_CORR": True,

    # ---- Optional post-fit filtering (for stats/plots only) ----
    "MIN_FWHM": None,                   # e.g., 2.0 (µs)
    "MAX_FWHM": None,                   # e.g., 30.0
    "MIN_AREA": None,                   # e.g., 0.02
    "MAX_AREA": None,
    "MIN_MAX_DISPLACEMENT": None,       # e.g., 0.01
    "MAX_MAX_DISPLACEMENT": None,

    # ---- Refined fit model & robustness ----
    "FIT_MODEL": "supergauss",            # "gaussian" | "supergauss" | "skewgauss"
    "SUPER_GAUSS_P": 6,               # only used if FIT_MODEL == "supergauss" (2 == Gaussian)
    "ROBUST_LOSS": "linear",           # "linear" | "soft_l1" | "huber" | "cauchy"
    "ROBUST_F_SCALE": 1.0,

    # ---- Parameter bounds (units: µs) ----
    "AMP_MIN": 1e-4,
    "AMP_MAX": 50.0,
    "SIGMA_MIN": 0.002,                 
    "SIGMA_MAX": 0.3,
    "MU_PAD_US": 0.02,                  # allow μ to move ± this beyond window for the fit

    # ---- Optional window refinement around the max before fitting ----
    "REFIT_USE_CENTERED_WINDOW": True,
    "REFIT_HALF_WINDOW_US": 5,       
}
