In [None]:
# Welch's t-test: Cholesterol vs Heart-attack occurrence 

# --- Core imports (stats unchanged) ---
import pandas as pd
import numpy as np
from scipy import stats

try:
    from rich.console import Console
    from rich.table import Table
    from rich import box
except Exception:
    import sys, subprocess
    subprocess.check_call([sys.executable, "-m", "pip", "install", "rich"])
    from rich.console import Console
    from rich.table import Table
    from rich import box

console = Console(force_jupyter=True)

df = pd.read_csv("Heart_Attack_Cleaned.csv")

# Identify columns
chol_col = next((c for c in df.columns if c.strip().lower() == "cholesterol"), None)
target_col = next((c for c in df.columns if c.strip().lower() == "target"), None)
if chol_col is None or target_col is None:
    raise ValueError("Required columns not found: cholesterol, target")

# Prepare fields
df[chol_col] = pd.to_numeric(df[chol_col], errors="coerce")
df[target_col] = pd.to_numeric(df[target_col], errors="coerce")
df = df.dropna(subset=[chol_col, target_col]).copy()

# Define groups: risk-positive := target != 0; risk-negative := target == 0
df["Risk group"] = np.where(df[target_col] != 0, "Risk positive", "Risk negative")

# Arrays for Welch
pos = df.loc[df["Risk group"] == "Risk positive", chol_col].to_numpy()
neg = df.loc[df["Risk group"] == "Risk negative", chol_col].to_numpy()

# Table 1: Descriptive statistics
desc = (
    df.groupby("Risk group")[chol_col]
      .agg(n="count",
           mean="mean",
           sd=lambda s: s.std(ddof=1),
           median="median",
           min="min",
           max="max")
      .reset_index()
      .rename(columns={
          "mean": "Mean Chol (mg/dL)",
          "sd": "SD (mg/dL)",
          "median": "Median (mg/dL)",
          "min": "Min (mg/dL)",
          "max": "Max (mg/dL)"
      })
)
# Round for display only
for col in ["Mean Chol (mg/dL)", "SD (mg/dL)", "Median (mg/dL)", "Min (mg/dL)", "Max (mg/dL)"]:
    desc[col] = pd.to_numeric(desc[col], errors="coerce").round(2)

# Welch’s t-test (two-tailed)
t_stat, p_val = stats.ttest_ind(pos, neg, equal_var=False, alternative="two-sided")
t_stat, p_val = float(t_stat), float(p_val)

n_pos, n_neg = len(pos), len(neg)
s1_sq, s2_sq = float(np.var(pos, ddof=1)), float(np.var(neg, ddof=1))
v1, v2 = s1_sq/n_pos, s2_sq/n_neg
df_welch = (v1 + v2)**2 / ((v1**2)/(n_pos-1) + (v2**2)/(n_neg-1))
mean_pos, mean_neg = float(np.mean(pos)), float(np.mean(neg))
mean_diff = mean_pos - mean_neg
se_diff = float(np.sqrt(v1 + v2))
tcrit = stats.t.ppf(0.975, df_welch)
ci_low, ci_high = mean_diff - tcrit*se_diff, mean_diff + tcrit*se_diff

# Minimal effect sizes
sp2 = ((n_pos-1)*s1_sq + (n_neg-1)*s2_sq) / (n_pos + n_neg - 2)
sp = float(np.sqrt(sp2))
hedges_g = (1 - (3/(4*(n_pos + n_neg) - 9))) * (mean_diff / sp)
r_pb = t_stat / np.sqrt(t_stat**2 + df_welch)

# --- Helper: red/green only where it matters; otherwise black ---
def rg_num(value, positive_is_good=True, fmt="{:.2f}"):
    """
    Return value as plain black text, except:
      - green for 'good' (positive if positive_is_good, negative if not),
      - red for the opposite.
    """
    try:
        v = float(value)
    except Exception:
        return str(value)

    if positive_is_good:
        if v > 0:
            return f"[green]{fmt.format(v)}[/]"
        elif v < 0:
            return f"[red]{fmt.format(v)}[/]"
    else:
        # e.g., p-value where smaller is better
        if v < 0.05:  # default threshold; caller can format with alpha elsewhere
            return f"[green]{fmt.format(v)}[/]"
        else:
            return f"[red]{fmt.format(v)}[/]"
    return fmt.format(v)

# --- Renderers ---
def render_table1(desc_df: pd.DataFrame):
    # All-black headers (bold only) and dim zebra row for tracking; no colored text here
    t = Table(
        title="Table 1. Descriptive statistics of cholesterol by risk group",
        box=box.SIMPLE_HEAVY,
        header_style="bold",        # black text (theme default), bold for emphasis
        row_styles=["none", "dim"]  # zebra without color
    )
    # Columns
    t.add_column("Risk group", justify="left", no_wrap=True)
    t.add_column("n", justify="right")
    t.add_column("Mean Chol (mg/dL)", justify="right")
    t.add_column("SD (mg/dL)", justify="right")
    t.add_column("Median (mg/dL)", justify="right")
    t.add_column("Min (mg/dL)", justify="right")
    t.add_column("Max (mg/dL)", justify="right")

    cols = ["Risk group","n","Mean Chol (mg/dL)","SD (mg/dL)","Median (mg/dL)","Min (mg/dL)","Max (mg/dL)"]
    for _, r in desc_df[cols].iterrows():
        t.add_row(
            str(r["Risk group"]),
            f"{int(r['n'])}",
            f"{r['Mean Chol (mg/dL)']:.2f}",
            f"{r['SD (mg/dL)']:.2f}",
            f"{r['Median (mg/dL)']:.2f}",
            f"{r['Min (mg/dL)']:.2f}",
            f"{r['Max (mg/dL)']:.2f}",
        )
    console.print(t)

def render_table2(mean_diff, ci_low, ci_high, t_stat, df_welch, p_val, hedges_g, r_pb, alpha=0.05):
    t = Table(
        title="Table 2. Welch’s t-test results (Cholesterol: risk-positive − risk-negative)",
        box=box.SIMPLE_HEAVY,
        header_style="bold",        # black text headers
        row_styles=["none", "dim"]  # zebra without color
    )
    t.add_column("Metric", style="", justify="left", no_wrap=True)
    t.add_column("Value", justify="right")

    # Only color values where helpful
    mean_diff_txt = rg_num(mean_diff, positive_is_good=True, fmt="{:.2f}")
    p_txt = f"{p_val:.2e}"
    # Color p-value green if < alpha else red
    p_txt = f"[green]{p_txt}[/]" if p_val < alpha else f"[red]{p_txt}[/]"

    # Optional: sign-driven color for g and r
    g_txt = rg_num(hedges_g, positive_is_good=True, fmt="{:.2f}")
    r_txt = rg_num(r_pb, positive_is_good=True, fmt="{:.3f}")

    t.add_row("Mean difference (mg/dL)", mean_diff_txt)
    t.add_row("95% CI (mg/dL)", f"[{ci_low:.2f}, {ci_high:.2f}]")
    t.add_row("t (Welch)", f"{t_stat:.2f}")
    t.add_row("df (Welch)", f"{df_welch:.2f}")
    t.add_row("p-value (two-tailed)", p_txt)
    t.add_row("Hedges' g", g_txt)
    t.add_row("Point-biserial r", r_txt)
    console.print(t)

    # Emphasized conclusion: bold only (no color coding)
    decision = "Reject H0: cholesterol differs by risk group" if p_val < alpha else "Fail to reject H0"
    console.print(f"[bold]Decision (alpha={alpha:.2f}): {decision}[/]")

# --- Render both tables ---
render_table1(desc)
render_table2(mean_diff, ci_low, ci_high, t_stat, df_welch, p_val, hedges_g, r_pb, alpha=0.05)
