In [1]:
# Welch's t-test: Resting blood pressure (trestbps) vs Heart-attack risk — Rich tables (black-first, targeted color)

# --- Core imports (stats unchanged) ---
import pandas as pd
import numpy as np
from scipy import stats

# --- Pretty tables via Rich (auto-install if missing) ---
try:
    from rich.console import Console
    from rich.table import Table
    from rich import box
except Exception:
    import sys, subprocess
    subprocess.check_call([sys.executable, "-m", "pip", "install", "rich"])
    from rich.console import Console
    from rich.table import Table
    from rich import box

# Force Rich to use the Jupyter renderer so colors and borders show in JupyterLab
console = Console(force_jupyter=True)

# --- Load data (unchanged logic) ---
df = pd.read_csv("Heart_Attack_Cleaned.csv")

# Identify required columns
bp_col = next((c for c in df.columns if c.strip().lower() == "trestbps"), None)
target_col = next((c for c in df.columns if c.strip().lower() == "target"), None)
if bp_col is None or target_col is None:
    raise ValueError("Required columns not found: trestbps, target")

# Prepare fields
df[bp_col] = pd.to_numeric(df[bp_col], errors="coerce")
df[target_col] = pd.to_numeric(df[target_col], errors="coerce")
df = df.dropna(subset=[bp_col, target_col]).copy()

# Define groups: risk-positive := target != 0; risk-negative := target == 0
df["Risk group"] = np.where(df[target_col] != 0, "Risk positive", "Risk negative")

# Arrays for Welch
pos = df.loc[df["Risk group"] == "Risk positive", bp_col].to_numpy()
neg = df.loc[df["Risk group"] == "Risk negative", bp_col].to_numpy()

# Table 1: Descriptive statistics by group
desc = (
    df.groupby("Risk group")[bp_col]
      .agg(n="count",
           mean="mean",
           sd=lambda s: s.std(ddof=1),
           median="median",
           min="min",
           max="max")
      .reset_index()
      .rename(columns={
          "mean": "Mean BP (mmHg)",
          "sd": "SD (mmHg)",
          "median": "Median (mmHg)",
          "min": "Min (mmHg)",
          "max": "Max (mmHg)"
      })
)
for col in ["Mean BP (mmHg)", "SD (mmHg)", "Median (mmHg)", "Min (mmHg)", "Max (mmHg)"]:
    desc[col] = pd.to_numeric(desc[col], errors="coerce").round(2)

# Welch’s t-test (two-tailed)
t_stat, p_val = stats.ttest_ind(pos, neg, equal_var=False, alternative="two-sided")
t_stat, p_val = float(t_stat), float(p_val)

n_pos, n_neg = len(pos), len(neg)
s1_sq, s2_sq = float(np.var(pos, ddof=1)), float(np.var(neg, ddof=1))
v1, v2 = s1_sq/n_pos, s2_sq/n_neg
df_welch = (v1 + v2)**2 / ((v1**2)/(n_pos-1) + (v2**2)/(n_neg-1))
mean_pos, mean_neg = float(np.mean(pos)), float(np.mean(neg))
mean_diff = mean_pos - mean_neg
se_diff = float(np.sqrt(v1 + v2))
tcrit = stats.t.ppf(0.975, df_welch)
ci_low, ci_high = mean_diff - tcrit*se_diff, mean_diff + tcrit*se_diff

# Minimal effect sizes
sp2 = ((n_pos-1)*s1_sq + (n_neg-1)*s2_sq) / (n_pos + n_neg - 2)
sp = float(np.sqrt(sp2))
hedges_g = (1 - (3/(4*(n_pos + n_neg) - 9))) * (mean_diff / sp)
r_pb = t_stat / np.sqrt(t_stat**2 + df_welch)

# --- Helper: red/green only where it matters; otherwise black ---
def rg_num(value, positive_is_good=True, fmt="{:.2f}", threshold=None):
    """
    Return value as plain black text, except:
      - green for 'good' (positive if positive_is_good, or < threshold if provided),
      - red for the opposite.
    """
    try:
        v = float(value)
    except Exception:
        return str(value)

    if threshold is not None:
        # e.g., p-value with alpha threshold
        return f"[green]{fmt.format(v)}[/]" if v < threshold else f"[red]{fmt.format(v)}[/]"
    else:
        if positive_is_good and v > 0:
            return f"[green]{fmt.format(v)}[/]"
        if positive_is_good and v < 0:
            return f"[red]{fmt.format(v)}[/]"
        if not positive_is_good and v < 0:
            return f"[green]{fmt.format(v)}[/]"
        if not positive_is_good and v > 0:
            return f"[red]{fmt.format(v)}[/]"
    return fmt.format(v)

# --- Renderers (black-first styling; zebra striping; targeted red/green) ---
def render_table1(desc_df: pd.DataFrame):
    t = Table(
        title="Table 1. Descriptive statistics of resting blood pressure by risk group",
        box=box.SIMPLE_HEAVY,
        header_style="bold",        # black text (theme default), bold for emphasis
        row_styles=["none", "dim"]  # zebra without color
    )
    t.add_column("Risk group", justify="left", no_wrap=True)
    t.add_column("n", justify="right")
    t.add_column("Mean BP (mmHg)", justify="right")
    t.add_column("SD (mmHg)", justify="right")
    t.add_column("Median (mmHg)", justify="right")
    t.add_column("Min (mmHg)", justify="right")
    t.add_column("Max (mmHg)", justify="right")

    cols = ["Risk group","n","Mean BP (mmHg)","SD (mmHg)","Median (mmHg)","Min (mmHg)","Max (mmHg)"]
    for _, r in desc_df[cols].iterrows():
        t.add_row(
            str(r["Risk group"]),
            f"{int(r['n'])}",
            f"{r['Mean BP (mmHg)']:.2f}",
            f"{r['SD (mmHg)']:.2f}",
            f"{r['Median (mmHg)']:.2f}",
            f"{r['Min (mmHg)']:.2f}",
            f"{r['Max (mmHg)']:.2f}",
        )
    console.print(t)

def render_table2(mean_diff, ci_low, ci_high, t_stat, df_welch, p_val, hedges_g, r_pb, alpha=0.05):
    t = Table(
        title="Table 2. Welch’s t-test results (Resting BP: risk-positive − risk-negative)",
        box=box.SIMPLE_HEAVY,
        header_style="bold",        # black text headers
        row_styles=["none", "dim"]  # zebra without color
    )
    t.add_column("Metric", justify="left", no_wrap=True)
    t.add_column("Value", justify="right")

    # Targeted red/green only for mean difference and p-value
    mean_diff_txt = rg_num(mean_diff, positive_is_good=True, fmt="{:.2f}")
    p_txt = rg_num(p_val, fmt="{:.2e}", threshold=alpha)

    t.add_row("Mean difference (mmHg)", mean_diff_txt)
    t.add_row("95% CI (mmHg)", f"[{ci_low:.2f}, {ci_high:.2f}]")
    t.add_row("t (Welch)", f"{t_stat:.2f}")
    t.add_row("df (Welch)", f"{df_welch:.2f}")
    t.add_row("p-value (two-tailed)", p_txt)
    t.add_row("Hedges' g", f"{hedges_g:.2f}")
    t.add_row("Point-biserial r", f"{r_pb:.3f}")
    console.print(t)

    # Emphasized conclusion: bold only (no color coding)
    decision = "Reject H0: resting BP differs by risk group" if p_val < alpha else "Fail to reject H0"
    console.print(f"[bold]Decision (alpha={alpha:.2f}): {decision}[/]")

# --- Render both tables (presentation only; calculations above unchanged) ---
alpha = 0.05
render_table1(desc)
render_table2(mean_diff, ci_low, ci_high, t_stat, df_welch, p_val, hedges_g, r_pb, alpha=alpha)
