In [2]:
# Two-proportion test: Heart-attack risk (target != 0) by sex — clean console tables (no exports)
import pandas as pd
import numpy as np
from scipy import stats

# Load
df = pd.read_csv("Heart_Attack_Cleaned.csv")

# Identify columns
sex_col = next((c for c in df.columns if c.strip().lower() == "sex"), None)
target_col = next((c for c in df.columns if c.strip().lower() == "target"), None)
if sex_col is None or target_col is None:
    raise ValueError("Required columns not found: sex, target")

# Prepare
df[sex_col] = pd.to_numeric(df[sex_col], errors="coerce")
df[target_col] = pd.to_numeric(df[target_col], errors="coerce")
df = df.dropna(subset=[sex_col, target_col]).copy()

# Define: risk-positive := target != 0; risk-negative := target == 0
df["Risk positive"] = (df[target_col] != 0).astype(int)
df["Sex"] = np.where(df[sex_col] == 1, "Male", "Female")

# Counts and rates by sex
tab = (
    df.groupby("Sex")["Risk positive"]
      .agg(n="count", risk_pos="sum")
      .reset_index()
)
tab["risk_neg"] = tab["n"] - tab["risk_pos"]
tab["risk_rate"] = (tab["risk_pos"] / tab["n"]) * 100

# Print Table 1
print("Table 1. Heart-attack risk by sex\n")
print(tab[["Sex","n","risk_pos","risk_neg","risk_rate"]]
      .rename(columns={
          "risk_pos":"Risk-positive",
          "risk_neg":"Risk-negative",
          "risk_rate":"Risk rate (%)"
      })
      .assign(**{"Risk rate (%)": lambda d: d["Risk rate (%)"].round(2)})
      .to_string(index=False))

# Extract male/female counts
row_m = tab.loc[tab["Sex"] == "Male"].iloc[0]
row_f = tab.loc[tab["Sex"] == "Female"].iloc[0]
n_male, x_male = int(row_m["n"]), int(row_m["risk_pos"])
n_female, x_female = int(row_f["n"]), int(row_f["risk_pos"])

p_male = x_male / n_male
p_female = x_female / n_female

# Two-proportion z-test (pooled SE for test)
p_pool = (x_male + x_female) / (n_male + n_female)
se_pool = np.sqrt(p_pool * (1 - p_pool) * (1/n_male + 1/n_female))
z_stat = (p_male - p_female) / se_pool
p_val_two = 2 * (1 - stats.norm.cdf(abs(z_stat)))

# 95% CI for difference (unpooled SE)
se_unpooled = np.sqrt(p_male*(1-p_male)/n_male + p_female*(1-p_female)/n_female)
zcrit = stats.norm.ppf(0.975)
diff = p_male - p_female
ci_low, ci_high = diff - zcrit*se_unpooled, diff + zcrit*se_unpooled

# Risk ratio and odds ratio with 95% CIs (Wald on log scale, continuity if needed)
A, B = x_male, n_male - x_male
C, D = x_female, n_female - x_female
if min(A,B,C,D) == 0:
    A += 0.5; B += 0.5; C += 0.5; D += 0.5

rr = (A/(A+B)) / (C/(C+D))
se_log_rr = np.sqrt(1/A - 1/(A+B) + 1/C - 1/(C+D))
rr_low, rr_high = np.exp(np.log(rr) - zcrit*se_log_rr), np.exp(np.log(rr) + zcrit*se_log_rr)

odds_ratio = (A*D) / (B*C)
se_log_or = np.sqrt(1/A + 1/B + 1/C + 1/D)
or_low, or_high = np.exp(np.log(odds_ratio) - zcrit*se_log_or), np.exp(np.log(odds_ratio) + zcrit*se_log_or)

# Print Table 2
res = pd.DataFrame([{
    "Risk difference (Male − Female)": round(diff, 4),
    "95% CI (difference)": f"[{ci_low:.4f}, {ci_high:.4f}]",
    "z statistic": round(z_stat, 2),
    "p-value (two-tailed)": "< 1e-15" if p_val_two < 1e-15 else f"{p_val_two:.2e}",
    "Risk ratio": f"{rr:.2f} [{rr_low:.2f}, {rr_high:.2f}]",
    "Odds ratio": f"{odds_ratio:.2f} [{or_low:.2f}, {or_high:.2f}]"
}])

print("\nTable 2. Two-proportion z-test summary (Male − Female)\n")
print(res.to_string(index=False))

# Simple decision line
alpha = 0.05
decision = "Reject H0: different risk rates by sex" if p_val_two < alpha else "Fail to reject H0"
print(f"\nDecision (alpha=0.05): {decision}.")


Table 1. Heart-attack risk by sex

   Sex    n  Risk-positive  Risk-negative  Risk rate (%)
Female  377            162            215          42.97
  Male 1114            755            359          67.77

Table 2. Two-proportion z-test summary (Male − Female)

 Risk difference (Male − Female) 95% CI (difference)  z statistic p-value (two-tailed)        Risk ratio        Odds ratio
                           0.248    [0.1910, 0.3050]         8.55              < 1e-15 1.58 [1.39, 1.78] 2.79 [2.20, 3.55]

Decision (alpha=0.05): Reject H0: different risk rates by sex.
