In [6]:
import pandas as pd
import numpy as np
from scipy.stats import mannwhitneyu, norm

# -------------------------------
# 1. Load CSV
# -------------------------------
csv_path = r"C:\Users\Abigail Crowther\OneDrive - The University of Manchester\Year 4\MPhys Projects\Semester 1 - Modelling Facial Deformation\Documents\surface_distance_metrics_rhabdo_no_outliers.csv"  # <-- change to your CSV path
data = pd.read_csv(csv_path)

# -------------------------------
# 2. Clean/Map Columns
# -------------------------------
data["gender"] = data["Gender"].map({"Female":"F", "Male":"M"})
data["tube"]   = data["Breathing Tube"].map({"Yes":"Tube", "No":"NoTube"})
data["age"]    = data["Age"]

# Map metrics
data["asd_gt_pred"]  = data["Average Surface Distance: GT->Pred (mm)"]
data["asd_pred_gt"]  = data["Average Surface Distance: Pred->GT (mm)"]
data["hd100"]        = data["Robust Hausdorff (100%) (mm)"]
data["hd95"]         = data["Robust Hausdorff (95%) (mm)"]
data["overlap_gt"]   = data["Surface Overlap at 1mm (GT)"]
data["overlap_pred"] = data["Surface Overlap at 1mm (Pred)"]

# -------------------------------
# 3. Helper functions
# -------------------------------

def cohen_d(a, b):
    """Compute Cohen's d for two independent samples."""
    n1, n2 = len(a), len(b)
    s1, s2 = np.var(a, ddof=1), np.var(b, ddof=1)
    pooled_std = np.sqrt(((n1-1)*s1 + (n2-1)*s2)/(n1+n2-2))
    return (np.mean(a) - np.mean(b)) / pooled_std

def bootstrap_ci(a, b, n_boot=2000):
    """Bootstrap 95% CI for Cohen's d."""
    boot_ds = []
    n_a, n_b = len(a), len(b)
    for _ in range(n_boot):
        a_s = np.random.choice(a, n_a, replace=True)
        b_s = np.random.choice(b, n_b, replace=True)
        boot_ds.append(cohen_d(a_s, b_s))
    return np.percentile(boot_ds, [2.5, 97.5])

def required_n_for_power(d, alpha=0.05, power=0.8):
    """Approximate sample size per group needed for two-sample t-test."""
    if abs(d) < 1e-6:
        return "Effect ≈ 0 → infinite N"
    z_alpha = norm.ppf(1 - alpha/2)
    z_beta = norm.ppf(power)
    n = 2 * ((z_alpha + z_beta)/d)**2
    return int(np.ceil(n))

def mann_whitney_report(metric, groupA, groupB, labelA, labelB):
    a = groupA[metric].dropna().values
    b = groupB[metric].dropna().values
    
    # Mann–Whitney U test
    U, p = mannwhitneyu(a, b, alternative="two-sided")
    
    # Cohen's d
    d = cohen_d(a, b)
    
    # 95% bootstrap CI
    ci_low, ci_high = bootstrap_ci(a, b)
    
    # Required N for 80% power
    req_n = required_n_for_power(d)
    
    return {
        "metric": metric,
        "comparison": f"{labelA} vs {labelB}",
        "U_statistic": U,
        "p_value": p,
        "cohen_d": d,
        "d_CI95_low": ci_low,
        "d_CI95_high": ci_high,
        "required_n_each_group_for_80%_power": req_n
    }

# -------------------------------
# 4. Run comparisons for all metrics
# -------------------------------
metrics = ["asd_gt_pred","asd_pred_gt","hd100","hd95","overlap_gt","overlap_pred"]
results = []

# Gender comparison
groupF = data[data.gender=="F"]
groupM = data[data.gender=="M"]
for m in metrics:
    results.append(mann_whitney_report(m, groupF, groupM, "Female", "Male"))

# Tube comparison
groupTube = data[data.tube=="Tube"]
groupNoTube = data[data.tube=="NoTube"]
for m in metrics:
    results.append(mann_whitney_report(m, groupTube, groupNoTube, "Tube", "No Tube"))

# -------------------------------
# 5. Create summary table
# -------------------------------
stats_df = pd.DataFrame(results)
stats_df.to_csv("statistical_summary_raw_no_dependencies.csv", index=False)
stats_df


  return (np.mean(a) - np.mean(b)) / pooled_std


Unnamed: 0,metric,comparison,U_statistic,p_value,cohen_d,d_CI95_low,d_CI95_high,required_n_each_group_for_80%_power
0,asd_gt_pred,Female vs Male,24.0,0.072727,1.314791,0.472116,2.927655,10
1,asd_pred_gt,Female vs Male,15.0,0.927273,0.444947,-0.875463,2.260979,80
2,hd100,Female vs Male,14.0,1.0,0.323964,-1.389247,1.063218,150
3,hd95,Female vs Male,15.0,0.924548,0.18405,-1.014458,1.541903,464
4,overlap_gt,Female vs Male,13.5,1.0,-0.205061,-1.057942,1.631192,374
5,overlap_pred,Female vs Male,16.0,0.776319,-0.025269,-0.959234,1.782,24585
6,asd_gt_pred,Tube vs No Tube,14.0,0.761905,0.077416,-1.09511,1.842936,2620
7,asd_pred_gt,Tube vs No Tube,10.0,0.761905,-0.270405,-1.799845,1.085953,215
8,hd100,Tube vs No Tube,7.0,0.352381,-0.707087,-1.74574,0.141192,32
9,hd95,Tube vs No Tube,12.5,1.0,0.186062,-1.23198,2.041282,454


In [None]:
import pandas as pd
import numpy as np
from scipy.stats import spearmanr, norm

csv_path = r"C:\Users\Abigail Crowther\OneDrive - The University of Manchester\Year 4\MPhys Projects\Semester 1 - Modelling Facial Deformation\Documents\surface_distance_metrics_rhabdo_no_outliers.csv"
data = pd.read_csv(csv_path)

metrics = {
    "Average Surface Distance: GT->Pred (mm)": "Average Surface Distance: GT->Pred (mm)",
    "Average Surface Distance: Pred->GT (mm)": "Average Surface Distance: Pred->GT (mm)",
    "Robust Hausdorff (100%) (mm)": "Robust Hausdorff (100%) (mm)",
    "Robust Hausdorff (95%) (mm)": "Robust Hausdorff (95%) (mm)",
    "Surface Overlap at 1mm (GT)": "Surface Overlap at 1mm (GT)",
    "Surface Overlap at 1mm (Pred)": "Surface Overlap at 1mm (Pred)"
}

def required_n_for_correlation(r, alpha=0.05, power=0.8):
    if abs(r) < 1e-6:
        return "Effect ≈ 0 → infinite N"
    z_alpha = norm.ppf(1 - alpha/2)
    z_beta = norm.ppf(power)
    n = ((z_alpha + z_beta) / np.arctanh(abs(r)))**2 + 3
    return int(np.ceil(n))

results = []

for label, col in metrics.items():
    valid = data[[col, "Age"]].dropna()
    rho, p = spearmanr(valid["Age"], valid[col])
    
    n_req = required_n_for_correlation(rho)
    
    results.append({
        "Metric": label,
        "Spearman_rho": rho,
        "p_value": p,
        "N_required_for_80%_power": n_req
    })

age_corr_df = pd.DataFrame(results)
age_corr_df


Unnamed: 0,Metric,Spearman_rho,p_value,N_required_for_80%_power
0,Average Surface Distance: GT->Pred (mm),-0.281133,0.40234,98
1,Average Surface Distance: Pred->GT (mm),0.479308,0.135765,32
2,Robust Hausdorff (100%) (mm),0.350264,0.29096,62
3,Robust Hausdorff (95%) (mm),0.337205,0.310539,67
4,Surface Overlap at 1mm (GT),-0.250011,0.458424,124
5,Surface Overlap at 1mm (Pred),-0.508117,0.110533,29
