In [2]:
# Welch's t-test: Age vs Heart-attack Risk —
# --- Core imports (stats unchanged) ---
import pandas as pd
import numpy as np
from scipy import stats

try:
    from rich.console import Console
    from rich.table import Table
    from rich import box
except Exception:
    import sys, subprocess
    subprocess.check_call([sys.executable, "-m", "pip", "install", "rich"])
    from rich.console import Console
    from rich.table import Table
    from rich import box

console = Console(force_jupyter=True)

# --- Load ---
df = pd.read_csv("Heart_Attack_Cleaned.csv")

# Identify columns
age_col = next((c for c in df.columns if c.strip().lower() == "age"), None)
target_col = next((c for c in df.columns if c.strip().lower() == "target"), None)
if age_col is None or target_col is None:
    raise ValueError("Required columns not found: age, target")

# Prepare fields
df[age_col] = pd.to_numeric(df[age_col], errors="coerce")
df[target_col] = pd.to_numeric(df[target_col], errors="coerce")
df = df.dropna(subset=[age_col, target_col]).copy()

# Define groups: risk-positive := target != 0; risk-negative := target == 0
df["Risk group"] = np.where(df[target_col] != 0, "Risk positive", "Risk negative")

# Arrays
pos = df.loc[df["Risk group"] == "Risk positive", age_col].to_numpy()
neg = df.loc[df["Risk group"] == "Risk negative", age_col].to_numpy()

# Table 1: Descriptive statistics
desc = (
    df.groupby("Risk group")[age_col]
      .agg(n="count",
           mean="mean",
           sd=lambda s: s.std(ddof=1),
           median="median",
           min="min",
           max="max")
      .reset_index()
      .rename(columns={
          "mean": "Mean age (years)",
          "sd": "SD (years)",
          "median": "Median (years)",
          "min": "Min (years)",
          "max": "Max (years)"
      })
)
for col in ["Mean age (years)", "SD (years)", "Median (years)", "Min (years)", "Max (years)"]:
    desc[col] = pd.to_numeric(desc[col], errors="coerce").round(2)

# Welch’s t-test (two-tailed)
t_stat, p_val = stats.ttest_ind(pos, neg, equal_var=False, alternative="two-sided")
t_stat, p_val = float(t_stat), float(p_val)

n_pos, n_neg = len(pos), len(neg)
s1_sq, s2_sq = float(np.var(pos, ddof=1)), float(np.var(neg, ddof=1))
v1, v2 = s1_sq/n_pos, s2_sq/n_neg
df_welch = (v1 + v2)**2 / ((v1**2)/(n_pos-1) + (v2**2)/(n_neg-1))
mean_pos, mean_neg = float(np.mean(pos)), float(np.mean(neg))
mean_diff = mean_pos - mean_neg
se_diff = float(np.sqrt(v1 + v2))
tcrit = stats.t.ppf(0.975, df_welch)
ci_low, ci_high = mean_diff - tcrit*se_diff, mean_diff + tcrit*se_diff

# Minimal effect size
sp2 = ((n_pos-1)*s1_sq + (n_neg-1)*s2_sq) / (n_pos + n_neg - 2)
sp = float(np.sqrt(sp2))
hedges_g = (1 - (3/(4*(n_pos + n_neg) - 9))) * (mean_diff / sp)

# --- Helpers: safe formatting and minimal red/green accent ---
def fmt_p(p, min_show=1e-300):
    """Never show 0; display as '< 1e-300' if underflowed or exactly zero."""
    return f"< {min_show:.0e}" if p <= 0 else f"{p:.2e}"

def rg_num(value, positive_is_good=True, fmt="{:.2f}", threshold=None):
    """
    Return value as plain black text, except:
      - green for 'good' (positive if positive_is_good, or < threshold if provided),
      - red for the opposite.
    """
    try:
        v = float(value)
    except Exception:
        return str(value)
    if threshold is not None:
        return f"[green]{fmt.format(v)}[/]" if v < threshold else f"[red]{fmt.format(v)}[/]"
    else:
        if positive_is_good and v > 0:
            return f"[green]{fmt.format(v)}[/]"
        if positive_is_good and v < 0:
            return f"[red]{fmt.format(v)}[/]"
        if not positive_is_good and v < 0:
            return f"[green]{fmt.format(v)}[/]"
        if not positive_is_good and v > 0:
            return f"[red]{fmt.format(v)}[/]"
    return fmt.format(v)

# --- Renderers ---
def render_table1(desc_df: pd.DataFrame):
    t = Table(
        title="Table 1. Descriptive statistics of age by risk group",
        box=box.SIMPLE_HEAVY,
        header_style="bold",        # black headers
        row_styles=["none", "dim"]  # zebra without color
    )
    t.add_column("Risk group", justify="left", no_wrap=True)
    t.add_column("n", justify="right")
    t.add_column("Mean age (years)", justify="right")
    t.add_column("SD (years)", justify="right")
    t.add_column("Median (years)", justify="right")
    t.add_column("Min (years)", justify="right")
    t.add_column("Max (years)", justify="right")

    cols = ["Risk group","n","Mean age (years)","SD (years)","Median (years)","Min (years)","Max (years)"]
    for _, r in desc_df[cols].iterrows():
        t.add_row(
            str(r["Risk group"]),
            f"{int(r['n'])}",
            f"{r['Mean age (years)']:.2f}",
            f"{r['SD (years)']:.2f}",
            f"{r['Median (years)']:.2f}",
            f"{r['Min (years)']:.2f}",
            f"{r['Max (years)']:.2f}",
        )
    console.print(t)

def render_table2(mean_diff, ci_low, ci_high, t_stat, df_welch, p_val, hedges_g, alpha=0.05):
    t = Table(
        title="Table 2. Welch’s t-test results (Age: risk-positive − risk-negative)",
        box=box.SIMPLE_HEAVY,
        header_style="bold",        # black headers
        row_styles=["none", "dim"]  # zebra without color
    )
    t.add_column("Metric", justify="left", no_wrap=True)
    t.add_column("Value", justify="right")

    # Targeted red/green only for mean difference and p-value
    mean_diff_txt = rg_num(mean_diff, positive_is_good=True, fmt="{:.2f}")
    p_txt = fmt_p(p_val)
    p_txt = f"[green]{p_txt}[/]" if p_val < alpha else f"[red]{p_txt}[/]"

    t.add_row("Mean difference (years)", mean_diff_txt)
    t.add_row("95% CI (years)", f"[{ci_low:.2f}, {ci_high:.2f}]")
    t.add_row("t (Welch)", f"{t_stat:.2f}")
    t.add_row("df (Welch)", f"{df_welch:.2f}")
    t.add_row("p-value (two-tailed)", p_txt)
    t.add_row("Hedges' g", f"{hedges_g:.2f}")
    console.print(t)

    # Emphasized conclusion:
    decision = "Reject H0" if p_val < alpha else "Fail to reject H0"
    console.print(f"[bold]Decision (alpha={alpha:.2f}): {decision}[/]")

# --- Render both tables ---
alpha = 0.05
render_table1(desc)
render_table2(mean_diff, ci_low, ci_high, t_stat, df_welch, p_val, hedges_g, alpha=alpha)
