In [1]:
import json
import numpy as np
import pandas as pd
from typing import Dict, Any, Optional

# ============================================================
# Apnea-driven cutter (NO filtering, NO ratio logic)
# Cuts BOTH voltage (EIT) and PAP using apnea times from config
# and saves them to the provided output file paths.
# ============================================================
def cut_apnea_window_and_save(
    clinical_config_path: str,
    patient_id: str,
    voltage_in_csv_path: str,
    voltage_out_csv_path: str,
    pap_in_csv_path: str,
    pap_out_csv_path: str,
    *,
    eit_sr: float = 50.355,       # EIT sample rate (Hz)
    pap_sr: Optional[float] = None,  # PAP sample rate (Hz). If None, defaults to ~1.0
) -> Dict[str, Any]:
    """
    Trim BOTH voltage (EIT) and PAP to [apnea_start_time, apnea_end_time) and save to output CSVs.
    - Voltage is read/written with NO header (shape preserved).
    - PAP keeps whatever headers/columns the input file has.

    Returns a report with indices, durations, and file paths.
    """
    # ---- Load apnea times from config ----
    with open(clinical_config_path, "r") as f:
        cfg = json.load(f)
    events = (cfg.get(patient_id, {}) or {}).get("events", {})
    if "apnea_start_time" not in events or "apnea_end_time" not in events:
        raise ValueError(f"Missing apnea_start_time/apnea_end_time for '{patient_id}' in clinical_config.json.")
    t0 = float(events["apnea_start_time"])
    t1 = float(events["apnea_end_time"])
    if t1 < t0:
        raise ValueError(f"apnea_end_time < apnea_start_time for '{patient_id}'.")

    # ---- Default PAP rate if not provided (≈1 Hz) ----
    if pap_sr is None:
        pap_sr = 1.0

    # ---- Load streams ----
    # Voltage (EIT): headerless numeric matrix (T, C)
    eit_df = pd.read_csv(voltage_in_csv_path, header=None)
    # PAP: preserve headers if present
    pap_df = pd.read_csv(pap_in_csv_path)

    # ---- Convert [t0, t1) to indices for each stream ----
    def _window_indices(n_samples: int, sr: float, t0: float, t1: float):
        start = max(0, int(round(t0 * sr)))
        target = max(0, int(round((t1 - t0) * sr)))
        end = min(n_samples, start + target)
        if end < start:
            end = start
        return start, end

    eit_start, eit_end = _window_indices(len(eit_df), eit_sr, t0, t1)
    pap_start, pap_end = _window_indices(len(pap_df), pap_sr, t0, t1)

    # ---- Slice the windows ----
    eit_trim_df = eit_df.iloc[eit_start:eit_end].copy()
    pap_trim_df = pap_df.iloc[pap_start:pap_end].copy()

    # ---- Save to the specified output paths ----
    # Voltage: no header, no index (pure numeric like input)
    eit_trim_df.to_csv(voltage_out_csv_path, header=False, index=False)
    # PAP: keep headers (if any) and no index
    pap_trim_df.to_csv(pap_out_csv_path, index=False)

    # ---- Prepare a concise report ----
    report = {
        "apnea_times": {"apnea_start_time": t0, "apnea_end_time": t1},
        "sampling_rates": {"eit_sr": eit_sr, "pap_sr": pap_sr},
        "indices": {
            "eit": {"start": eit_start, "end": eit_end, "len": int(eit_end - eit_start)},
            "pap": {"start": pap_start, "end": pap_end, "len": int(pap_end - pap_start)},
        },
        "durations_seconds": {
            "eit": (eit_end - eit_start) / eit_sr if eit_sr > 0 else float("nan"),
            "pap": (pap_end - pap_start) / pap_sr if pap_sr > 0 else float("nan"),
            "target": (t1 - t0),
        },
        "paths": {
            "voltage_in": voltage_in_csv_path,
            "voltage_out": voltage_out_csv_path,
            "pap_in": pap_in_csv_path,
            "pap_out": pap_out_csv_path,
            "config": clinical_config_path,
            "patient_id": patient_id,
        },
    }
    return report


def print_cut_summary(report: Dict[str, Any]) -> None:
    eit = report["indices"]["eit"]
    pap = report["indices"]["pap"]
    dur = report["durations_seconds"]
    t = report["apnea_times"]
    print(
        f"Apnea window [{t['apnea_start_time']:.3f}, {t['apnea_end_time']:.3f}) s\n"
        f"  EIT  → idx [{eit['start']}, {eit['end']}) len={eit['len']} "
        f"(~{dur['eit']:.3f}s)\n"
        f"  PAP  → idx [{pap['start']}, {pap['end']}) len={pap['len']} "
        f"(~{dur['pap']:.3f}s)\n"
        f"  Target duration: ~{dur['target']:.3f}s"
    )


In [3]:
if __name__ == "__main__":
    # Define base folder once (global for all paths)
    FOLDER_BASE = r"data_displaid_tidal\data_tilda_transformed"

    # List of patients to process
    patient_ids = ["TDL016", "TDL010", "TDL012", "TDL013", "TDL014", "TDL015"]

    for patient_id in patient_ids:
        print(f"\n=== Processing patient {patient_id} ===")

        report = cut_apnea_window_and_save(
            clinical_config_path=fr"{FOLDER_BASE}\clinical_config.json",
            patient_id=patient_id,
            voltage_in_csv_path=fr"{FOLDER_BASE}\Data Raw Voltage\{patient_id}.csv",
            voltage_out_csv_path=fr"{FOLDER_BASE}\Data Raw Voltage apnea\{patient_id}_voltage_apnea.csv",
            pap_in_csv_path=fr"{FOLDER_BASE}\Data pap\{patient_id}_transcribed_PAP.csv",
            pap_out_csv_path=fr"{FOLDER_BASE}\Data pap apnea\{patient_id}_pap_apnea.csv",
            eit_sr=50.355,
            pap_sr=1.0,  # set your actual PAP sample rate here if known
        )

        print_cut_summary(report)



=== Processing patient TDL016 ===
Apnea window [75.000, 255.000) s
  EIT  → idx [3777, 12841) len=9064 (~180.002s)
  PAP  → idx [75, 255) len=180 (~180.000s)
  Target duration: ~180.000s

=== Processing patient TDL010 ===
Apnea window [175.000, 360.000) s
  EIT  → idx [8812, 18128) len=9316 (~185.006s)
  PAP  → idx [175, 360) len=185 (~185.000s)
  Target duration: ~185.000s

=== Processing patient TDL012 ===
Apnea window [80.000, 260.000) s
  EIT  → idx [4028, 13092) len=9064 (~180.002s)
  PAP  → idx [80, 260) len=180 (~180.000s)
  Target duration: ~180.000s

=== Processing patient TDL013 ===
Apnea window [90.000, 275.000) s
  EIT  → idx [4532, 13848) len=9316 (~185.006s)
  PAP  → idx [90, 275) len=185 (~185.000s)
  Target duration: ~185.000s

=== Processing patient TDL014 ===
Apnea window [90.000, 270.000) s
  EIT  → idx [4532, 13596) len=9064 (~180.002s)
  PAP  → idx [90, 270) len=180 (~180.000s)
  Target duration: ~180.000s

=== Processing patient TDL015 ===
Apnea window [60.000, 2

In [5]:
import numpy as np
import pandas as pd

# ============================================================
# Upscale PAP (apnea) to match Voltage (apnea) rows & rate
# - Robust to various PAP column names; falls back to first col
# - Handles empty / single-sample PAP gracefully
# - Optional CSV save with time column
# ============================================================
def _pick_pap_column(pap_df: pd.DataFrame) -> np.ndarray:
    """
    Return PAP series as 1D float64 array.
    Preference order by column name; else first column.
    """
    preferred = ("PAP (mmHg)", "pap_mmHg", "pap", "PAP")
    for name in preferred:
        if name in pap_df.columns:
            return pap_df[name].to_numpy(dtype=np.float64)
    # Fallback: first column
    return pap_df.iloc[:, 0].to_numpy(dtype=np.float64)

def upsample_pap_apnea_to_voltage_grid(
    voltage_apnea_csv_path: str,
    pap_apnea_csv_path: str,
    *,
    eit_sr: float = 50.355,
    pap_sr: float = 1.0,
    out_csv_path: Optional[str] = None,
    time0_s: float = 0.0,
) -> Dict[str, Any]:
    """
    Read apnea-trimmed voltage (headerless matrix) and PAP (with headers),
    then upsample PAP to the voltage timeline (same number of rows as voltage).

    Returns a dict with the upsampled vector and metadata.
    If out_csv_path is provided, saves a CSV with time + PAP_on_VoltageApnea (mmHg).
    """
    # --- Load apnea-trimmed voltage & pap ---
    eit_df = pd.read_csv(voltage_apnea_csv_path, header=None)   # shape (T_eit, C)
    pap_df = pd.read_csv(pap_apnea_csv_path)                    # keeps headers if present

    T_eit = len(eit_df)
    pap_series = _pick_pap_column(pap_df)
    N_pap = pap_series.size

    # --- Edge cases ---
    if T_eit == 0:
        result = np.zeros(0, dtype=np.float32)
    elif N_pap == 0:
        # No PAP samples: return zeros aligned to EIT length
        result = np.zeros(T_eit, dtype=np.float32)
    elif N_pap == 1:
        # Single PAP sample: constant fill
        result = np.full(T_eit, float(pap_series[0]), dtype=np.float32)
    else:
        # --- Build time bases (both start at the apnea window start) ---
        t_eit = time0_s + np.arange(T_eit, dtype=np.float64) / eit_sr
        t_pap = time0_s + np.arange(N_pap, dtype=np.float64) / pap_sr

        # --- Linear interpolation onto EIT grid ---
        result = np.interp(t_eit, t_pap, pap_series).astype(np.float32)

    # --- Optional save ---
    saved_path = None
    if out_csv_path:
        import os
        os.makedirs(os.path.dirname(out_csv_path), exist_ok=True)
        time_s = time0_s + np.arange(T_eit, dtype=np.float64) / eit_sr
        out_df = pd.DataFrame({
            "time_s": time_s,
            "PAP_on_VoltageApnea (mmHg)": result
        })
        out_df.to_csv(out_csv_path, index=False)
        saved_path = out_csv_path

    return {
        "pap_on_voltage_apnea": result,   # np.ndarray, shape (T_eit,)
        "eit_rows": T_eit,
        "pap_rows": int(N_pap),
        "eit_sr": float(eit_sr),
        "pap_sr": float(pap_sr),
        "saved_to": saved_path,
        "inputs": {
            "voltage_apnea_csv_path": voltage_apnea_csv_path,
            "pap_apnea_csv_path": pap_apnea_csv_path,
        },
    }

def print_upscale_summary(summary: Dict[str, Any]) -> None:
    print(
        "PAP apnea → Voltage apnea upsample\n"
        f"  EIT rows: {summary['eit_rows']} @ {summary['eit_sr']} Hz\n"
        f"  PAP rows: {summary['pap_rows']} @ {summary['pap_sr']} Hz\n"
        f"  Saved to: {summary['saved_to'] or '(not saved)'}"
    )

# ------------------------------------------------------------
# Example (call manually; does NOT affect your existing __main__)
# ------------------------------------------------------------
def example_upscale_for_patient(patient_id: str = "TDL016"):
    base = r"data_displaid_tidal\data_tilda_transformed"
    voltage_apnea_csv = rf"{base}\Data Raw Voltage apnea\{patient_id}_voltage_apnea.csv"
    pap_apnea_csv = rf"{base}\Data pap apnea\{patient_id}_pap_apnea.csv"
    out_csv = rf"{base}\Data pap EIT apnea\{patient_id}_pap_on_voltage_apnea.csv"

    summary = upsample_pap_apnea_to_voltage_grid(
        voltage_apnea_csv_path=voltage_apnea_csv,
        pap_apnea_csv_path=pap_apnea_csv,
        eit_sr=50.355,
        pap_sr=1.0,               # set your true PAP Hz if known
        out_csv_path=out_csv,     # omit to skip saving
        time0_s=0.0,
    )
    print_upscale_summary(summary)


In [6]:
if __name__ == "__main__":
    # Define base folder once (global for all paths)
    FOLDER_BASE = r"data_displaid_tidal\data_tilda_transformed"

    # List of patients to process
    patient_ids = ["TDL016", "TDL010", "TDL012", "TDL013", "TDL014", "TDL015"]

    for patient_id in patient_ids:
        print(f"\n=== Upscaling PAP for patient {patient_id} ===")

        summary = upsample_pap_apnea_to_voltage_grid(
            voltage_apnea_csv_path=fr"{FOLDER_BASE}\Data Raw Voltage apnea\{patient_id}_voltage_apnea.csv",
            pap_apnea_csv_path=fr"{FOLDER_BASE}\Data pap apnea\{patient_id}_pap_apnea.csv",
            eit_sr=50.355,
            pap_sr=1.0,
            out_csv_path=fr"{FOLDER_BASE}\Data pap apnea upscaled\{patient_id}_pap_on_voltage_apnea.csv",
        )

        print_upscale_summary(summary)



=== Upscaling PAP for patient TDL016 ===
PAP apnea → Voltage apnea upsample
  EIT rows: 9064 @ 50.355 Hz
  PAP rows: 180 @ 1.0 Hz
  Saved to: data_displaid_tidal\data_tilda_transformed\Data pap apnea upscaled\TDL016_pap_on_voltage_apnea.csv

=== Upscaling PAP for patient TDL010 ===
PAP apnea → Voltage apnea upsample
  EIT rows: 9316 @ 50.355 Hz
  PAP rows: 185 @ 1.0 Hz
  Saved to: data_displaid_tidal\data_tilda_transformed\Data pap apnea upscaled\TDL010_pap_on_voltage_apnea.csv

=== Upscaling PAP for patient TDL012 ===
PAP apnea → Voltage apnea upsample
  EIT rows: 9064 @ 50.355 Hz
  PAP rows: 180 @ 1.0 Hz
  Saved to: data_displaid_tidal\data_tilda_transformed\Data pap apnea upscaled\TDL012_pap_on_voltage_apnea.csv

=== Upscaling PAP for patient TDL013 ===
PAP apnea → Voltage apnea upsample
  EIT rows: 9316 @ 50.355 Hz
  PAP rows: 185 @ 1.0 Hz
  Saved to: data_displaid_tidal\data_tilda_transformed\Data pap apnea upscaled\TDL013_pap_on_voltage_apnea.csv

=== Upscaling PAP for patient T