In [None]:
# G. Hayes 2025
# This script computes the t-tests and anovas regarding the PETCO2 ranges between modalities, and testing for the influence(s) of age and sex in the analysis presented in:
# G. Hayes, S. Sparks, J. Pinto, and D. P. Bulte, "Models of Cerebrovascular Reactivity in BOLD-fMRI and Transcranial Doppler Ultrasound"

# Updated this script for your purposes, notably:
# - the data file name
# - the output file name
# - alter parameters that may differ for your data

In [None]:
import pandas as pd
import numpy as np
import statsmodels.formula.api as smf
import statsmodels.api as sm
from scipy.stats import shapiro, norm, ttest_rel
from statsmodels.stats.diagnostic import het_breuschpagan
from statsmodels.stats.stattools import durbin_watson

# PETCO2 baseline and change values between TCD and MRI stats

In [None]:
# Load the Excel file
df = pd.read_csv('path/to/data.csv')

# Extract arrays of per-subject data for TCD and MRI
# Columns: "sub", "tcdpetco2_base", "tcdpetco2_diff", "mrpetco2_base", "mrpetco2_diff"
tcd_baseline = df["tcdpetco2_base"].to_numpy()
tcd_change = df["tcdpetco2_diff"].to_numpy()
mri_baseline = df["mrpetco2_base"].to_numpy()
mri_change = df["mrpetco2_diff"].to_numpy()

# Perform paired t-tests
stat_baseline, pval_baseline = ttest_rel(tcd_baseline, mri_baseline)
stat_change, pval_change = ttest_rel(tcd_change, mri_change)

# Print results
print("Paired t-test for baseline PETCO2 (TCD vs. MRI):")
print(f"  t-statistic: {stat_baseline:.3f}, p-value: {pval_baseline:.3g}")

print("\nPaired t-test for change in PETCO2 (TCD vs. MRI):")
print(f"  t-statistic: {stat_change:.3f}, p-value: {pval_change:.3g}")

Paired t-test for baseline PETCO2 (TCD vs. MRI):
  t-statistic: -6.299, p-value: 3.78e-06

Paired t-test for change in PETCO2 (TCD vs. MRI):
  t-statistic: 0.175, p-value: 0.863


# Age and sex confound stats

In [10]:
df2 = pd.read_csv('/Users/ghayes/Desktop/ramp_protocol/mni_comparison_fullramp/logs_20241227_mritcd/mri_tcd_forstats.csv')

In [None]:
# Define your parameters of interest
parameters = [
    "tcdslope_lin", "tcdintercept_lin", "tcda_opt_4p", "tcdb_opt_4p",
    "tcdc_opt_4p", "tcdd_opt_4p", "tcdb_2p_opt", "tcdc_2p_opt",
    "mrslope_lin", "mrintercept_lin", "mra_opt_4p", "mrb_opt_4p",
    "mrc_opt_4p", "mrd_opt_4p", "mrb_2p_opt", "mrc_2p_opt"
]

df2["sex"] = df2["sex"].map({"M": 0, "F": 1})

# Create a list to store results
results_list = []

for param in parameters:
    formula = f"{param} ~ age + C(sex)"
    
    # Fit an OLS model initially
    ols_model = smf.ols(formula=formula, data=df2).fit()
    residuals = ols_model.resid
    
    # Check normality of residuals
    stat_shapiro, p_shapiro = shapiro(residuals)
    
    model_used = "OLS"
    final_model = ols_model
    
    if p_shapiro < 0.05:
        # Fit robust model using RLM
        model_used = "RLM"
        
        df_temp = df2.dropna(subset=["age", "sex", param])  # remove rows with missing
        y = df_temp[param].astype(float)
        
        # Setup endog (y) and exog (X)
        y = df_temp[param].astype(float)
        X = df_temp[["age", "sex"]].astype(float)

        # Add constant to exog for intercept
        X = sm.add_constant(X, prepend=True)

        # Fit a robust linear model using Huber’s T norm
        robust_model = sm.RLM(y, X, M=sm.robust.norms.HuberT()).fit()
        final_model = robust_model
    
    # Now gather assumptions and coefficient stats
    if model_used == "OLS":
        # Homoscedasticity (Breusch-Pagan) + Durbin-Watson
        bp_test = het_breuschpagan(final_model.resid, ols_model.model.exog)
        bp_stat, bp_pval = bp_test[0], bp_test[1]
        dw_stat = durbin_watson(final_model.resid)
        
        r_sq = final_model.rsquared
        coef_age = final_model.params.get("age", None)
        p_age = final_model.pvalues.get("age", None)
        
        # Sex param might be "C(sex)[T.1]" or "C(sex)[T.F]" etc.
        sex_key = None
        for k in final_model.params.index:
            if "C(sex)[" in k:
                sex_key = k
                break
        coef_sex = final_model.params.get(sex_key, None) if sex_key else None
        p_sex = final_model.pvalues.get(sex_key, None) if sex_key else None
        
    else:
        # If RLM is used, we do not have an rsquared or built-in p-values
        bp_stat, bp_pval, dw_stat = None, None, None
        r_sq = None
        
        # Gather coefficients
        # final_model.params keys might be [const, age, sex]
        coef_age = final_model.params.get("age", None)
        coef_sex = final_model.params.get("sex", None)
        
        # Approximate p-values using a Wald-type z-test
        # var-cov matrix
        cov_params = final_model.cov_params()
        
        # For age
        if "age" in cov_params.index and "age" in cov_params.columns:
            var_age = cov_params.loc["age", "age"]
            if var_age > 0:
                z_age = coef_age / np.sqrt(var_age)
                p_age = 2 * (1 - norm.cdf(abs(z_age)))
            else:
                z_age, p_age = None, None
        else:
            z_age, p_age = None, None
        
        # For sex
        if "sex" in cov_params.index and "sex" in cov_params.columns:
            var_sex = cov_params.loc["sex", "sex"]
            if var_sex > 0:
                z_sex = coef_sex / np.sqrt(var_sex)
                p_sex = 2 * (1 - norm.cdf(abs(z_sex)))
            else:
                z_sex, p_sex = None, None
        else:
            z_sex, p_sex = None, None

    results_list.append({
        "parameter": param,
        "model_used": model_used,
        "shapiro_p": p_shapiro,
        "age_coef": coef_age,
        "age_pval": p_age,
        "sex_coef": coef_sex,
        "sex_pval": p_sex,
        "r_squared": r_sq,
        "bp_stat": bp_stat,
        "bp_pval": bp_pval,
        "dw_stat": dw_stat
    })

# Convert the list of dictionaries to a DataFrame
results_df = pd.DataFrame(results_list)

# Save the results to a CSV file
results_df.to_csv('path/to/output.csv', index=False)