In [4]:
# ==========================================================
#  Conventional HRV Analysis (Mean RR, SDNN, RMSSD, PNN50)
#  for Pediatric HRV Dataset (RR interval time series)
#  Author: [Tu nombre]
#  Repository: Paper_TDA_HRV
# ==========================================================
#!pip install scikit-posthocs

# ----------------------------------------------------------
# 0. Import libraries
# ----------------------------------------------------------
import numpy as np
import pandas as pd
from pathlib import Path
from tqdm import tqdm

# ----------------------------------------------------------
# 1. Load subject information and create age groups (WHO)
# ----------------------------------------------------------

def create_age_groups_who(df):
    """Assign developmental age groups based on WHO classification."""
    def age_group(age):
        if pd.isna(age):
            return "Unknown"
        elif 0 <= age < 1/12:
            return "Neonates (0â€“1 mo)"
        elif 1/12 <= age < 6/12:
            return "Early Infancy (1â€“5 mo)"
        elif 6/12 <= age < 1.0:
            return "Late Infancy (6â€“11 mo)"
        elif 1.0 <= age < 3.0:
            return "Toddlers (1â€“2 yr)"
        elif 3.0 <= age < 6.0:
            return "Preschoolers (3â€“5 yr)"
        elif 6.0 <= age < 12.0:
            return "School-age (6â€“11 yr)"
        elif 12.0 <= age < 17.0:
            return "Adolescents (12â€“17 yr)"
        else:
            return "Excluded"
    df["Age_Group"] = df["Age (years)"].apply(age_group)
    return df[df["Age_Group"] != "Excluded"].reset_index(drop=True)


# Load patient info
df_info = pd.read_csv("/content/drive/MyDrive/Paper_TDA_HRV/patient-info.csv")
df_info = create_age_groups_who(df_info)

# Base path for RR text files
base_path = Path("/content/drive/MyDrive/Paper_TDA_HRV/rr_data/rr-interval-time-series-from-healthy-subjects-1.0.0")

# ----------------------------------------------------------
# 2. HRV metrics computation
# ----------------------------------------------------------

def calcular_metricas_rr(file_path, N_sample=3000):
    """Compute conventional time-domain HRV metrics from truncated RR series."""
    serie = pd.to_numeric(
        pd.read_csv(file_path, header=None, usecols=[0], dtype=str).iloc[:, 0],
        errors="coerce"
    ).dropna().values

    # Remove physiologically implausible intervals
    serie = serie[(serie > 300) & (serie < 2000)]

    # --- Truncate or pad to N_sample (3000 beats) ---
    if len(serie) >= N_sample:
        serie = serie[:N_sample]
    else:
        # Optional: pad by repeating last value to reach N_sample
        serie = np.pad(serie, (0, N_sample - len(serie)), mode="edge")

    # --- HRV metrics ---
    mean_rr = np.mean(serie)
    std_rr = np.std(serie)
    min_rr = np.min(serie)
    max_rr = np.max(serie)
    diff_rr = np.diff(serie)
    rmssd_rr = np.sqrt(np.mean(diff_rr ** 2))
    pnn50_rr = 100.0 * np.sum(np.abs(diff_rr) > 50) / len(diff_rr)

    return {
        "mean_RR": mean_rr,
        "SDNN_RR": std_rr,
        "min_RR": min_rr,
        "max_RR": max_rr,
        "RMSSD_RR": rmssd_rr,
        "PNN50_RR": pnn50_rr
    }


# ----------------------------------------------------------
# 3. Process all subjects and store metrics
# ----------------------------------------------------------

results = []

for _, row in tqdm(df_info.iterrows(), total=len(df_info), desc="Processing subjects"):
    file_path = base_path / f"{int(row['File']):03d}.txt"
    if not file_path.exists():
        continue

    metrics = calcular_metricas_rr(file_path)
    if metrics is not None:
        metrics["File"] = int(row["File"])
        metrics["Age (years)"] = row["Age (years)"]
        metrics["Age_Group"] = row["Age_Group"]
        results.append(metrics)

df_metrics = pd.DataFrame(results)

# ----------------------------------------------------------
# 4. Compute summary statistics per group
# ----------------------------------------------------------

summary = (
    df_metrics.groupby("Age_Group")
    .agg({
        "mean_RR": ["median", "mean"],
        "SDNN_RR": ["median", "mean"],
        "RMSSD_RR": ["median", "mean"],
        "PNN50_RR": ["median", "mean"]
    })
    .round(2)
)

summary.columns = ["_".join(col) for col in summary.columns]
summary = summary.reset_index()

print("âœ… HRV summary by age group:")
print(summary)

# ----------------------------------------------------------
# 5. (Optional) Non-parametric statistical testing
# ----------------------------------------------------------
from scipy.stats import kruskal
import scikit_posthocs as sp

print("\nðŸ“Š Kruskalâ€“Wallis tests:")
for metric in ["mean_RR", "SDNN_RR", "RMSSD_RR", "PNN50_RR"]:
    groups = [df_metrics.loc[df_metrics["Age_Group"] == g, metric] for g in df_metrics["Age_Group"].unique()]
    H, p = kruskal(*groups)
    print(f"{metric}: H={H:.3f}, p={p:.5f}")

# Example post-hoc Dunn test for SDNN
dunn_sdnn = sp.posthoc_dunn(
    df_metrics,
    val_col="SDNN_RR",
    group_col="Age_Group",
    p_adjust="bonferroni"
)

# ----------------------------------------------------------
# 6. Export results
# ----------------------------------------------------------

df_metrics.to_csv("Conventional_HRV_Metrics_by_Subject.csv", index=False)
summary.to_csv("Conventional_HRV_Summary_by_Group.csv", index=False)
dunn_sdnn.to_csv("PostHoc_Dunn_SDNN.csv")

print("\nâœ… Files saved successfully.")


Processing subjects: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 135/135 [00:11<00:00, 12.00it/s]

âœ… HRV summary by age group:
                Age_Group  mean_RR_median  mean_RR_mean  SDNN_RR_median  \
0  Adolescents (12â€“17 yr)          674.29        674.35           81.43   
1  Early Infancy (1â€“5 mo)          402.46        406.43           40.88   
2  Late Infancy (6â€“11 mo)          430.78        431.68           36.74   
3       Neonates (0â€“1 mo)          404.01        412.50           62.94   
4   Preschoolers (3â€“5 yr)          507.66        512.53           55.58   
5    School-age (6â€“11 yr)          608.55        621.81           70.85   
6       Toddlers (1â€“2 yr)          473.88        470.00           49.36   

   SDNN_RR_mean  RMSSD_RR_median  RMSSD_RR_mean  PNN50_RR_median  \
0         86.33            39.23          46.11            14.14   
1         40.76            26.64          34.43             1.23   
2         43.46            32.42          42.41             1.40   
3         58.69            67.88          63.21             4.18   
4         51.40


