In [9]:
import os
import glob
import h5py
import numpy as np
import pandas as pd
def compute_tdrift99_single(wf, tp0, frac=0.999):
    n = len(wf)
    start = int(tp0)
    if start >= n - 1:
        return np.nan
    segment = wf[start:]
    peak_val = segment.max()
    if peak_val <= 0:
        return np.nan
    threshold = frac * peak_val
    above = np.where(segment >= threshold)[0]
    if len(above) == 0:
        return np.nan
    return float(above[0])
def pole_zero_correct(wf, tau_samples=500.0):
    alpha = np.exp(-1.0 / tau_samples)
    y = np.zeros_like(wf, dtype=np.float64)
    prev_x = wf[0]
    prev_y = 0.0
    for i in range(1, len(wf)):
        x = float(wf[i])
        y_i = x - prev_x + alpha * prev_y
        y[i] = y_i
        prev_x = x
        prev_y = y_i
    return y
def compute_tfr_single(wf, tp0, tail_offset=200, tail_len=600):
    n = len(wf)
    start = int(tp0) + tail_offset
    if start >= n - 10:
        return np.nan
    end = min(n, start + tail_len)
    tail_raw = wf[start:end].astype(np.float64)
    wf_pz = pole_zero_correct(wf)
    tail_pz = wf_pz[start:end]
    std_raw = np.std(tail_raw)
    std_pz = np.std(tail_pz)
    if std_pz <= 0:
        return np.nan
    return float(std_raw / std_pz)
def smooth_gaussian(x, sigma=2.0):
    if sigma <= 0:
        return x.astype(np.float64)
    radius = int(3 * sigma)
    idx = np.arange(-radius, radius + 1, dtype=np.float64)
    kernel = np.exp(-0.5 * (idx / sigma) ** 2)
    kernel /= kernel.sum()
    padded = np.pad(x, radius, mode="edge")
    conv = np.convolve(padded, kernel, mode="same")
    return conv[radius:-radius]
def compute_peak_count_single(wf,tp0,window_after_tp0=400,grad_threshold_frac=0.05,min_separation=5):
    n = len(wf)
    tp0 = int(tp0)
    base_end = min(200, n)
    baseline = float(np.mean(wf[:base_end]))
    wf_bs = wf - baseline
    max_val = np.max(np.abs(wf_bs))
    if max_val <= 0:
        return 0
    wf_norm = wf_bs / max_val
    start = max(tp0 - 10, 0)
    end = min(tp0 + window_after_tp0, n)
    segment = wf_norm[start:end]
    seg_smooth = smooth_gaussian(segment, sigma=2.0)
    grad = np.gradient(seg_smooth)
    gmax = np.max(np.abs(grad))
    if gmax <= 0:
        return 0
    threshold = grad_threshold_frac * gmax
    count = 0
    last_peak_idx = -min_separation - 1
    for i in range(1, len(grad) - 1):
        if grad[i] > grad[i - 1] and grad[i] > grad[i + 1] and grad[i] >= threshold:
            if i - last_peak_idx >= min_separation:
                count += 1
                last_peak_idx = i
    return int(count)
def compute_gradient_baseline_noise_single(wf, baseline_region=(0, 200)):
    start, end = baseline_region
    start = max(start, 0)
    end = min(end, len(wf))
    if end - start < 5:
        return np.nan
    segment = wf[start:end].astype(np.float64)
    grad = np.gradient(segment)
    return float(np.sqrt(np.mean(grad ** 2)))
def compute_band_power_ratio_single(wf,fs=100e6,low_band=(0.1e6, 1e6),high_band=(1e6, 10e6),):
    x = wf.astype(np.float64)
    x = x - np.mean(x)
    fft_vals = np.fft.rfft(x)
    psd = np.abs(fft_vals) ** 2
    freqs = np.fft.rfftfreq(len(x), d=1.0 / fs)
    low_mask = (freqs >= low_band[0]) & (freqs < low_band[1])
    high_mask = (freqs >= high_band[0]) & (freqs < high_band[1])
    power_low = psd[low_mask].sum()
    power_high = psd[high_mask].sum()
    if power_low <= 0:
        return np.nan
    return float(power_high / power_low)
def process_hdf5_file(h5_path, out_dir):
    print(f"Processing {h5_path}...")
    basename = os.path.basename(h5_path)
    with h5py.File(h5_path, "r") as f:
        waveforms = f["raw_waveform"][:]         
        tp0 = f["tp0"][:]                        
        ids = f["id"][:]                        
    n_events = waveforms.shape[0]
    print(f"  Found {n_events} waveforms.")
    tdrift10_list = []
    tdrift50_list = []
    tdrift_list = []
    tfr_list = []
    peak_count_list = []
    gbn_list = []
    bpr_list = []
    for i in range(n_events):
        wf = waveforms[i]
        t0 = tp0[i]
        tdrift10_list.append(compute_tdrift99_single(wf, t0, frac=0.10))
        tdrift50_list.append(compute_tdrift99_single(wf, t0, frac=0.50))
        tdrift_list.append(compute_tdrift99_single(wf, t0))
        tfr_list.append(compute_tfr_single(wf, t0))
        peak_count_list.append(compute_peak_count_single(wf, t0))
        gbn_list.append(compute_gradient_baseline_noise_single(wf))
        bpr_list.append(compute_band_power_ratio_single(wf))
        if (i + 1) % 5000 == 0:
            print(f"    Processed {i + 1}/{n_events} events...")
    df = pd.DataFrame(
        {"id": ids,"file": basename,"tdrift10": tdrift10_list,"tdrift50": tdrift50_list,"tdrift99": tdrift_list,"tfr": tfr_list,
         "peak_count": peak_count_list,"gbn": gbn_list,"bpr": bpr_list,} )
    os.makedirs(out_dir, exist_ok=True)
    out_name = os.path.splitext(basename)[0] + "_myparams.csv"
    out_path = os.path.join(out_dir, out_name)
    df.to_csv(out_path, index=False)
    print(f"  Saved CSV to {out_path}\n")
def main():
    DATA_DIR = os.path.abspath("../../data")
    NPML_PATTERN = os.path.join(DATA_DIR, "MJD_NPML*.hdf5")
    OUT_DIR_NPML = os.path.join(DATA_DIR, "params_npml")
    npml_files = sorted(glob.glob(NPML_PATTERN))
    print("NPML files:", npml_files)
    for path in npml_files:
        process_hdf5_file(path, OUT_DIR_NPML)
if __name__ == "__main__":
    main()


NPML files: ['/Users/prithvikochhar/Documents/GitHub/Majorana-Neutrino-Hunt/data/MJD_NPML_0.hdf5', '/Users/prithvikochhar/Documents/GitHub/Majorana-Neutrino-Hunt/data/MJD_NPML_1.hdf5', '/Users/prithvikochhar/Documents/GitHub/Majorana-Neutrino-Hunt/data/MJD_NPML_2.hdf5']
Processing /Users/prithvikochhar/Documents/GitHub/Majorana-Neutrino-Hunt/data/MJD_NPML_0.hdf5...
  Found 65000 waveforms.
    Processed 5000/65000 events...
    Processed 10000/65000 events...
    Processed 15000/65000 events...
    Processed 20000/65000 events...
    Processed 25000/65000 events...
    Processed 30000/65000 events...
    Processed 35000/65000 events...
    Processed 40000/65000 events...
    Processed 45000/65000 events...
    Processed 50000/65000 events...
    Processed 55000/65000 events...
    Processed 60000/65000 events...
    Processed 65000/65000 events...
  Saved CSV to /Users/prithvikochhar/Documents/GitHub/Majorana-Neutrino-Hunt/data/params_npml/MJD_NPML_0_myparams.csv

Processing /Users/pri

In [3]:
import pandas as pd
file_list = ["/Users/prithvikochhar/Documents/GitHub/Majorana-Neutrino-Hunt/data/params_npml/MJD_NPML_0_myparams.csv",
             "/Users/prithvikochhar/Documents/GitHub/Majorana-Neutrino-Hunt/data/params_npml/MJD_NPML_1_myparams.csv",
             "/Users/prithvikochhar/Documents/GitHub/Majorana-Neutrino-Hunt/data/params_npml/MJD_NPML_2_myparams.csv"]
df_list = []
for file in file_list:
    df = pd.read_csv(file)
    df_list.append(df)
combined_df = pd.concat(df_list, ignore_index=True)

In [5]:
output_file = "prithvi_combined_npml.csv.gz"
combined_df.to_csv(
    output_file,
    index=False,
    compression="gzip"
)