In [None]:
# Welch's t-test: Max heart rate vs Exercise-induced angina
import pandas as pd
import numpy as np
from scipy import stats

# Load data
df = pd.read_csv("Heart_Attack_Cleaned.csv")

# Identify required columns
ang_col = next((c for c in df.columns if c.strip().lower() == "exercise angina"), None)
hr_col  = next((c for c in df.columns if c.strip().lower() == "max heart rate"), None)
if ang_col is None or hr_col is None:
    raise ValueError("Required columns not found: exercise angina, max heart rate")

# Prepare fields
df[ang_col] = pd.to_numeric(df[ang_col], errors="coerce")
df[hr_col]  = pd.to_numeric(df[hr_col],  errors="coerce")
df = df.dropna(subset=[ang_col, hr_col]).copy()

# Define groups
df["Group"] = np.where(df[ang_col] == 1, "With exercise-induced angina", "Without exercise-induced angina")

# Arrays for Welch
with_ang    = df.loc[df["Group"] == "With exercise-induced angina", hr_col].to_numpy()
without_ang = df.loc[df["Group"] == "Without exercise-induced angina", hr_col].to_numpy()

# Table 1: Descriptive statistics
desc = (
    df.groupby("Group")[hr_col]
      .agg(n="count",
           mean="mean",
           sd=lambda s: s.std(ddof=1),
           median="median",
           min="min",
           max="max")
      .reset_index()
      .rename(columns={
          "mean":   "Mean HR (bpm)",
          "sd":     "SD (bpm)",
          "median": "Median (bpm)",
          "min":    "Min (bpm)",
          "max":    "Max (bpm)"
      })
)
for col in ["Mean HR (bpm)", "SD (bpm)"]:
    desc[col] = desc[col].round(2)

# Welch’s t-test (two-tailed)
t_stat, p_val = stats.ttest_ind(with_ang, without_ang, equal_var=False, alternative="two-sided")
t_stat, p_val = float(t_stat), float(p_val)

n_with, n_without = len(with_ang), len(without_ang)
s1_sq, s2_sq = float(np.var(with_ang, ddof=1)), float(np.var(without_ang, ddof=1))
v1, v2 = s1_sq/n_with, s2_sq/n_without
df_welch = (v1 + v2)**2 / ((v1**2)/(n_with-1) + (v2**2)/(n_without-1))
mean_with, mean_without = float(np.mean(with_ang)), float(np.mean(without_ang))
mean_diff = mean_with - mean_without
se_diff = float(np.sqrt(v1 + v2))
tcrit = stats.t.ppf(0.975, df_welch)
ci_low, ci_high = mean_diff - tcrit*se_diff, mean_diff + tcrit*se_diff

# Minimal effect sizes
sp2 = ((n_with-1)*s1_sq + (n_without-1)*s2_sq) / (n_with + n_without - 2)
sp = float(np.sqrt(sp2))
hedges_g = (1 - (3/(4*(n_with + n_without) - 9))) * (mean_diff / sp)
r_pb = t_stat / np.sqrt(t_stat**2 + df_welch)

# Print tables
print("Table 1. Maximum heart rate by exercise-induced angina status\n")
print(desc[["Group","n","Mean HR (bpm)","SD (bpm)","Median (bpm)","Min (bpm)","Max (bpm)"]]
      .to_string(index=False))

print("\nTable 2. Welch’s t-test results (Max HR: with angina − without angina)\n")
res = pd.DataFrame([{
    "Mean difference (bpm)": round(mean_diff, 2),
    "95% CI (bpm)": f"[{ci_low:.2f}, {ci_high:.2f}]",
    "t (Welch)": round(t_stat, 2),
    "df (Welch)": round(df_welch, 2),
    "p-value (two-tailed)": f"{p_val:.2e}",
    "Hedges' g": round(hedges_g, 2),
    "Point-biserial r": round(r_pb, 3)
}])
print(res.to_string(index=False))

# Simple decision line
alpha = 0.05
decision = "Reject H0: max HR differs by angina status" if p_val < alpha else "Fail to reject H0"
print(f"\nDecision (alpha=0.05): {decision}.")
