In [1]:
import sys
import os
from scipy.optimize import curve_fit

# Get the project root (one level above notebooks/)
project_root = os.path.abspath("..")
sys.path.append(project_root)

print("Added to Python path:", project_root)

import h5py
import numpy as np
import pandas as pd
# from src.parameters.tail_features import compute_LQ80


Added to Python path: c:\Users\YooNi\OneDrive\Desktop\Majorana-Neutrino-Hunt


In [2]:
# train_file = "../data/old/MJD_Train_0.hdf5"

# with h5py.File(train_file, "r") as f:
#     waveforms = np.array(f["raw_waveform"])
#     ids = np.array(f["id"])

# print("Loaded", len(waveforms), "waveforms")

In [3]:
def estimate_baseline(y, n_samples=200):
    """
    Returns baseline (mean, std) from the first n_samples.
    """
    y0 = np.asarray(y, dtype=float)[:n_samples]
    return float(np.mean(y0)), float(np.std(y0))


In [4]:
def compute_ND80(waveform, n_pre=200):
    """
    ND80: Normalized maximum dip below the 80 percent amplitude level
    between the 80 percent crossing and the peak.
    """

    y = np.asarray(waveform, dtype=float)

    baseline, _ = estimate_baseline(y, n_samples=n_pre)
    peak_idx = int(np.argmax(y))
    peak_val = float(y[peak_idx])
    amp = peak_val - baseline

    if amp <= 0:
        return np.nan

    level80 = baseline + 0.80 * amp

    above = np.where(y >= level80)[0]
    if len(above) == 0:
        return np.nan

    i80 = int(above[0])

    if i80 >= peak_idx:
        return 0.0

    seg = y[i80: peak_idx + 1]

    depth_vec = level80 - seg
    depth_vec[depth_vec < 0] = 0.0

    depth_abs = float(np.max(depth_vec))
    depth_norm = depth_abs / amp if amp > 0 else np.nan

    return depth_norm


In [None]:
OUTPUT_DIR = "finalcsveunice"
os.makedirs(OUTPUT_DIR, exist_ok=True)

In [None]:
# ND80 train
all_train_ids_nd80 = []
all_ND80_train = []

for train_idx in range(16):
    train_file = f"../data/old/MJD_Train_{train_idx}.hdf5"
    if not os.path.exists(train_file):
        print(f"Skipping missing file: {train_file}")
        continue

    print(f"\nLoading {train_file}")

    with h5py.File(train_file, "r") as f:
        waveforms_train = np.array(f["raw_waveform"])
        ids_train = np.array(f["id"])

    print(f"  Waveforms: {len(waveforms_train)}")

    for i, wf in enumerate(waveforms_train):
        if i % 5000 == 0:
            print(f"    ND80 Train_{train_idx} {i}/{len(waveforms_train)}")

        all_ND80_train.append(compute_ND80(wf))
        all_train_ids_nd80.append(f"{ids_train[i]}_train_{train_idx}")

all_ND80_train = np.array(all_ND80_train, dtype=float)
all_ND80_train[~np.isfinite(all_ND80_train)] = np.nan

df_nd80_train = pd.DataFrame({
    "id": all_train_ids_nd80,
    "ND80": all_ND80_train
})

output_path_nd80_train = os.path.join(OUTPUT_DIR, "ND80_train_all.csv")
df_nd80_train.to_csv(output_path_nd80_train, index=False)

print("\nSaved combined ND80 TRAIN CSV to:", output_path_nd80_train)
print(df_nd80_train.head())
print(df_nd80_train["ND80"].describe())
print("NaNs:", df_nd80_train["ND80"].isna().sum())


In [None]:
# ND80 test
all_test_ids_nd80 = []
all_ND80_test = []

for test_idx in range(6):
    test_file = f"../data/old/MJD_Test_{test_idx}.hdf5"
    if not os.path.exists(test_file):
        print(f"Skipping missing file: {test_file}")
        continue

    print(f"\nLoading {test_file}")

    with h5py.File(test_file, "r") as f:
        waveforms_test = np.array(f["raw_waveform"])
        ids_test = np.array(f["id"])

    print(f"  Waveforms: {len(waveforms_test)}")

    for i, wf in enumerate(waveforms_test):
        if i % 5000 == 0:
            print(f"    ND80 Test_{test_idx} {i}/{len(waveforms_test)}")

        all_ND80_test.append(compute_ND80(wf))
        all_test_ids_nd80.append(f"{ids_test[i]}_test_{test_idx}")

all_ND80_test = np.array(all_ND80_test, dtype=float)
all_ND80_test[~np.isfinite(all_ND80_test)] = np.nan

df_nd80_test = pd.DataFrame({
    "id": all_test_ids_nd80,
    "ND80": all_ND80_test
})

output_path_nd80_test = os.path.join(OUTPUT_DIR, "ND80_test_all.csv")
df_nd80_test.to_csv(output_path_nd80_test, index=False)

print("\nSaved combined ND80 TEST CSV to:", output_path_nd80_test)
print(df_nd80_test.head())
print(df_nd80_test["ND80"].describe())
print("NaNs:", df_nd80_test["ND80"].isna().sum())
