In [1]:
import pandas as pd
import numpy as np
from pathlib import Path
from petTOAD_load import RES_DIR
from scipy import stats
from scipy.stats import mannwhitneyu

TBL_DIR = RES_DIR / "Tables"
if not Path.exists(TBL_DIR):
    Path.mkdir(TBL_DIR)

def format_pval(pval):
    return "< 0.001" if pval < 0.001 else f"{pval:.3f}"

In [2]:
# Load the clinical dataframe with the groupings
df_petTOAD_pre = pd.read_csv(RES_DIR / "df_petTOAD.csv", index_col = 0)
df_petTOAD_pre["WMH_bin"] = np.where(
    df_petTOAD_pre["Group_bin_Fazekas"].str.contains("no_WMH"), "no_WMH", "WMH"
)
df_petTOAD_pre["wmh_log"] = np.log10(df_petTOAD_pre["WMH_load_subj_space"], where=(df_petTOAD_pre["WMH_load_subj_space"] != 0), out = np.zeros_like(df_petTOAD_pre["WMH_load_subj_space"]))
df_petTOAD = df_petTOAD_pre.copy()

In [75]:
num_wmh = df_petTOAD.groupby("WMH_bin").count()["PTID"]["WMH"]
num_no_wmh = df_petTOAD.groupby("WMH_bin").count()["PTID"]["no_WMH"]
num = num_wmh + num_no_wmh

wmh_seg_n = df_petTOAD.groupby("WMH_bin")["Sex"].value_counts()["WMH"]
no_wmh_seg_n = df_petTOAD.groupby("WMH_bin")["Sex"].value_counts()["no_WMH"]

wmh_age_25 = df_petTOAD.groupby("WMH_bin")["Age"].describe()["25%"]["WMH"]
wmh_age_50 = df_petTOAD.groupby("WMH_bin")["Age"].describe()["50%"]["WMH"]
wmh_age_75 = df_petTOAD.groupby("WMH_bin")["Age"].describe()["75%"]["WMH"]

no_wmh_age_25 = df_petTOAD.groupby("WMH_bin")["Age"].describe()["25%"]["no_WMH"]
no_wmh_age_50 = df_petTOAD.groupby("WMH_bin")["Age"].describe()["50%"]["no_WMH"]
no_wmh_age_75 = df_petTOAD.groupby("WMH_bin")["Age"].describe()["75%"]["no_WMH"]

wmh_mmse_25 = df_petTOAD.groupby("WMH_bin")["MMSE"].describe()["25%"]["WMH"]
wmh_mmse_50 = df_petTOAD.groupby("WMH_bin")["MMSE"].describe()["50%"]["WMH"]
wmh_mmse_75 = df_petTOAD.groupby("WMH_bin")["MMSE"].describe()["75%"]["WMH"]

no_wmh_mmse_25 = df_petTOAD.groupby("WMH_bin")["MMSE"].describe()["25%"]["no_WMH"]
no_wmh_mmse_50 = df_petTOAD.groupby("WMH_bin")["MMSE"].describe()["50%"]["no_WMH"]
no_wmh_mmse_75 = df_petTOAD.groupby("WMH_bin")["MMSE"].describe()["75%"]["no_WMH"]

wmh_edu_25 = df_petTOAD.groupby("WMH_bin")["PTEDUCAT"].describe()["25%"]["WMH"]
wmh_edu_50 = df_petTOAD.groupby("WMH_bin")["PTEDUCAT"].describe()["50%"]["WMH"]
wmh_edu_75 = df_petTOAD.groupby("WMH_bin")["PTEDUCAT"].describe()["75%"]["WMH"]

no_wmh_edu_25 = df_petTOAD.groupby("WMH_bin")["PTEDUCAT"].describe()["25%"]["no_WMH"]
no_wmh_edu_50 = df_petTOAD.groupby("WMH_bin")["PTEDUCAT"].describe()["50%"]["no_WMH"]
no_wmh_edu_75 = df_petTOAD.groupby("WMH_bin")["PTEDUCAT"].describe()["75%"]["no_WMH"]

wmh_wmh_log_25 = df_petTOAD.groupby("WMH_bin")["wmh_log"].describe()["25%"]["WMH"]
wmh_wmh_log_50 = df_petTOAD.groupby("WMH_bin")["wmh_log"].describe()["50%"]["WMH"]
wmh_wmh_log_75 = df_petTOAD.groupby("WMH_bin")["wmh_log"].describe()["75%"]["WMH"]

no_wmh_wmh_log_25 = df_petTOAD.groupby("WMH_bin")["wmh_log"].describe()["25%"]["no_WMH"]
no_wmh_wmh_log_50 = df_petTOAD.groupby("WMH_bin")["wmh_log"].describe()["50%"]["no_WMH"]
no_wmh_wmh_log_75 = df_petTOAD.groupby("WMH_bin")["wmh_log"].describe()["75%"]["no_WMH"]

In [76]:
_, pval_age = mannwhitneyu(df_petTOAD[df_petTOAD["WMH_bin"] == "no_WMH"]["Age"], df_petTOAD[df_petTOAD["WMH_bin"] == "WMH"]["Age"])
ct_sex = pd.crosstab(df_petTOAD["WMH_bin"], df_petTOAD["Sex"])
_, p_sex, _, _ = stats.chi2_contingency(ct_sex)
_, pval_mmse = mannwhitneyu(df_petTOAD[df_petTOAD["WMH_bin"] == "no_WMH"]["MMSE"], df_petTOAD[df_petTOAD["WMH_bin"] == "WMH"]["MMSE"])
_, pval_edu = mannwhitneyu(df_petTOAD[df_petTOAD["WMH_bin"] == "no_WMH"]["PTEDUCAT"], df_petTOAD[df_petTOAD["WMH_bin"] == "WMH"]["PTEDUCAT"])
_, pval_wmh = mannwhitneyu(df_petTOAD[df_petTOAD["WMH_bin"] == "no_WMH"]["wmh_log"], df_petTOAD[df_petTOAD["WMH_bin"] == "WMH"]["wmh_log"])

In [77]:
summary_data = {
    "Age": [f"{wmh_age_50} ({wmh_age_25} - {wmh_age_75})",
        f"{no_wmh_age_50} ({no_wmh_age_25} - {no_wmh_age_75})",
        f"{format_pval(pval_age)}",
        ],
    
    "Sex n (%)": ["", "", f"{p_sex:.2f}"],
    "Women": [
        f"{wmh_seg_n['F']} ({round(wmh_seg_n['F'] / num_wmh * 100, 2)}%)",
        f"{no_wmh_seg_n['F']} ({round(no_wmh_seg_n['F'] / num_no_wmh * 100, 2)}%)",
        " "
    ],
    "Men": [
        f"{wmh_seg_n['M']} ({round(wmh_seg_n['M'] / num_wmh * 100, 2)}%)",
        f"{no_wmh_seg_n['M']} ({round(no_wmh_seg_n['M'] / num_no_wmh * 100, 2)}%)",
        " "        
    ],
    
    "WMH vol. (log)": [f"{wmh_wmh_log_50:.2f} ({wmh_wmh_log_25:.2f} - {wmh_wmh_log_75:.2f})",
        f"{no_wmh_wmh_log_50:.2f} ({no_wmh_wmh_log_25:.2f} - {no_wmh_wmh_log_75:.2f})",
        f"{format_pval(pval_wmh)}",
        ],

    "MMSE": [f"{wmh_mmse_50} ({wmh_mmse_25} - {wmh_mmse_75})",
        f"{no_wmh_mmse_50} ({no_wmh_mmse_25} - {no_wmh_mmse_75})",
        f"{format_pval(pval_mmse)}",
        ],

    "Education (yrs.)": [f"{wmh_edu_50} ({wmh_edu_25} - {wmh_edu_75})",
        f"{no_wmh_edu_50} ({no_wmh_edu_25} - {no_wmh_edu_75})",
        f"{format_pval(pval_edu)}",
        ],        
    }


# Create the summary DataFrame
df_summary = pd.DataFrame(data=summary_data)

# Print the summary DataFrame
df_table1 = df_summary.T
df_table1.columns = [
    f"WMH (n = {num_wmh})",
    f"no WMH (n = {num_no_wmh})",
    "p",
]

In [78]:
df_table1.to_csv(TBL_DIR / "table_1.csv")