In [1]:
# Welch's t-test: Max heart rate vs Exercise-induced angina —

# --- Core imports (stats unchanged) ---
import pandas as pd
import numpy as np
from scipy import stats

# --- Pretty tables via Rich (auto-install if missing) ---
try:
    from rich.console import Console
    from rich.table import Table
    from rich import box
except Exception:
    import sys, subprocess
    subprocess.check_call([sys.executable, "-m", "pip", "install", "rich"])
    from rich.console import Console
    from rich.table import Table
    from rich import box

# Force Rich to use the Jupyter renderer so colors and borders show in JupyterLab
console = Console(force_jupyter=True)

# --- Load data ---
df = pd.read_csv("Heart_Attack_Cleaned.csv")

# Identify required columns
ang_col = next((c for c in df.columns if c.strip().lower() == "exercise angina"), None)
hr_col  = next((c for c in df.columns if c.strip().lower() == "max heart rate"), None)
if ang_col is None or hr_col is None:
    raise ValueError("Required columns not found: exercise angina, max heart rate")

# Prepare fields
df[ang_col] = pd.to_numeric(df[ang_col], errors="coerce")
df[hr_col]  = pd.to_numeric(df[hr_col],  errors="coerce")
df = df.dropna(subset=[ang_col, hr_col]).copy()

# Define groups
df["Group"] = np.where(df[ang_col] == 1, "With exercise-induced angina", "Without exercise-induced angina")

# Arrays for Welch
with_ang    = df.loc[df["Group"] == "With exercise-induced angina", hr_col].to_numpy()
without_ang = df.loc[df["Group"] == "Without exercise-induced angina", hr_col].to_numpy()

# Table 1: Descriptive statistics
desc = (
    df.groupby("Group")[hr_col]
      .agg(n="count",
           mean="mean",
           sd=lambda s: s.std(ddof=1),
           median="median",
           min="min",
           max="max")
      .reset_index()
      .rename(columns={
          "mean":   "Mean HR (bpm)",
          "sd":     "SD (bpm)",
          "median": "Median (bpm)",
          "min":    "Min (bpm)",
          "max":    "Max (bpm)"
      })
)
for col in ["Mean HR (bpm)", "SD (bpm)", "Median (bpm)", "Min (bpm)", "Max (bpm)"]:
    desc[col] = pd.to_numeric(desc[col], errors="coerce").round(2)

# Welch’s t-test (two-tailed)
t_stat, p_val = stats.ttest_ind(with_ang, without_ang, equal_var=False, alternative="two-sided")
t_stat, p_val = float(t_stat), float(p_val)

n_with, n_without = len(with_ang), len(without_ang)
s1_sq, s2_sq = float(np.var(with_ang, ddof=1)), float(np.var(without_ang, ddof=1))
v1, v2 = s1_sq/n_with, s2_sq/n_without
df_welch = (v1 + v2)**2 / ((v1**2)/(n_with-1) + (v2**2)/(n_without-1))
mean_with, mean_without = float(np.mean(with_ang)), float(np.mean(without_ang))
mean_diff = mean_with - mean_without
se_diff = float(np.sqrt(v1 + v2))
tcrit = stats.t.ppf(0.975, df_welch)
ci_low, ci_high = mean_diff - tcrit*se_diff, mean_diff + tcrit*se_diff

# Minimal effect sizes
sp2 = ((n_with-1)*s1_sq + (n_without-1)*s2_sq) / (n_with + n_without - 2)
sp = float(np.sqrt(sp2))
hedges_g = (1 - (3/(4*(n_with + n_without) - 9))) * (mean_diff / sp)
r_pb = t_stat / np.sqrt(t_stat**2 + df_welch)

# --- Helpers: safe formatting and minimal red/green accent ---
def fmt_p(p, min_show=1e-300):
    """Never show 0; display as '< 1e-300' if underflowed or exactly zero."""
    return f"< {min_show:.0e}" if p <= 0 else f"{p:.2e}"

def rg_num(value, positive_is_good=True, fmt="{:.2f}", threshold=None):
    """
    Return value as plain black text, except:
      - green for 'good' (positive if positive_is_good, or < threshold if provided),
      - red for the opposite.
    """
    try:
        v = float(value)
    except Exception:
        return str(value)
    if threshold is not None:
        return f"[green]{fmt.format(v)}[/]" if v < threshold else f"[red]{fmt.format(v)}[/]"
    else:
        if positive_is_good and v > 0:
            return f"[green]{fmt.format(v)}[/]"
        if positive_is_good and v < 0:
            return f"[red]{fmt.format(v)}[/]"
        if not positive_is_good and v < 0:
            return f"[green]{fmt.format(v)}[/]"
        if not positive_is_good and v > 0:
            return f"[red]{fmt.format(v)}[/]"
    return fmt.format(v)

# --- Renderers (black-first styling; zebra striping; targeted red/green) ---
def render_table1(desc_df: pd.DataFrame):
    t = Table(
        title="Table 1. Maximum heart rate by exercise-induced angina status",
        box=box.SIMPLE_HEAVY,
        header_style="bold",        # black headers
        row_styles=["none", "dim"]  # zebra without color
    )
    t.add_column("Group", justify="left", no_wrap=True)
    t.add_column("n", justify="right")
    t.add_column("Mean HR (bpm)", justify="right")
    t.add_column("SD (bpm)", justify="right")
    t.add_column("Median (bpm)", justify="right")
    t.add_column("Min (bpm)", justify="right")
    t.add_column("Max (bpm)", justify="right")

    cols = ["Group","n","Mean HR (bpm)","SD (bpm)","Median (bpm)","Min (bpm)","Max (bpm)"]
    for _, r in desc_df[cols].iterrows():
        t.add_row(
            str(r["Group"]),
            f"{int(r['n'])}",
            f"{r['Mean HR (bpm)']:.2f}",
            f"{r['SD (bpm)']:.2f}",
            f"{r['Median (bpm)']:.2f}",
            f"{r['Min (bpm)']:.2f}",
            f"{r['Max (bpm)']:.2f}",
        )
    console.print(t)

def render_table2(mean_diff, ci_low, ci_high, t_stat, df_welch, p_val, hedges_g, r_pb, alpha=0.05):
    t = Table(
        title="Table 2. Welch’s t-test results (Max HR: with angina − without angina)",
        box=box.SIMPLE_HEAVY,
        header_style="bold",        # black headers
        row_styles=["none", "dim"]  # zebra without color
    )
    t.add_column("Metric", justify="left", no_wrap=True)
    t.add_column("Value", justify="right")

    # Targeted red/green only for mean difference and p-value
    mean_diff_txt = rg_num(mean_diff, positive_is_good=True, fmt="{:.2f}")
    p_txt = fmt_p(p_val)
    p_txt = f"[green]{p_txt}[/]" if p_val < alpha else f"[red]{p_txt}[/]"

    t.add_row("Mean difference (bpm)", mean_diff_txt)
    t.add_row("95% CI (bpm)", f"[{ci_low:.2f}, {ci_high:.2f}]")
    t.add_row("t (Welch)", f"{t_stat:.2f}")
    t.add_row("df (Welch)", f"{df_welch:.2f}")
    t.add_row("p-value (two-tailed)", p_txt)
    t.add_row("Hedges' g", f"{hedges_g:.2f}")
    t.add_row("Point-biserial r", f"{r_pb:.3f}")
    console.print(t)

    # Emphasized conclusion: bold only (no color coding)
    decision = "Reject H0: max HR differs by angina status" if p_val < alpha else "Fail to reject H0"
    console.print(f"[bold]Decision (alpha={alpha:.2f}): {decision}[/]")

# --- Render both tables (presentation only; calculations above unchanged) ---
alpha = 0.05
render_table1(desc)
render_table2(mean_diff, ci_low, ci_high, t_stat, df_welch, p_val, hedges_g, r_pb, alpha=alpha)
