In [2]:
# comparisons by location and by pre and post data
import data_read
import pandas as pd
import numpy as np
from scipy.stats import mannwhitneyu

# File paths
vivosight_folder = "data/Vivosight_data/"
scan_folder = "data/scan_information/"

# Step 1: Initialize collection
subject_ids = data_read.get_all_subject_ids(vivosight_folder)
data = {
    "Subject": [],
    "Scan ID": [],
    "Location ID": [],
    "Exposure": [],
    "Max Vessel Diameter": [],
}

# Step 2: Use process_subject_vivosight_data to get structured info
for subject_id in subject_ids:
    subject = data_read.process_subject_vivosight_data(
        subject_id, vivosight_folder, scan_folder, "bloodflow"
    )

    for visit in [subject.visit, subject.return_visit]:
        if visit and visit.blood_flow:
            # Recalculate min_location_id

            for blood_data in visit.blood_flow:
                blood_data.get_max_diameter()
                # print("check",blood_data.location)

                data["Subject"].append(subject_id)
                data["Scan ID"].append(blood_data.scan_id)
                data["Location ID"].append(blood_data.location)
                data["Exposure"].append("Post" if blood_data.exposed else "Pre")
                data["Max Vessel Diameter"].append(blood_data.max_diameter)

# Create DataFrame
df = pd.DataFrame(data)

# Remove outliers (IQR method)
Q1 = df["Max Vessel Diameter"].quantile(0.25)
Q3 = df["Max Vessel Diameter"].quantile(0.75)
IQR = Q3 - Q1
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR

df_clean = df[
    (df["Max Vessel Diameter"] >= lower_bound) &
    (df["Max Vessel Diameter"] <= upper_bound)
]

# Step 5: Run Mann-Whitney U test for Pre vs Post per location
print("Mann-Whitney U Test: Pre vs Post by Location\n")

for loc in [1, 2, 3, 4]:
    pre = df_clean[(df_clean["Location ID"] == loc) & (df_clean["Exposure"] == "Pre")]["Max Vessel Diameter"]
    post = df_clean[(df_clean["Location ID"] == loc) & (df_clean["Exposure"] == "Post")]["Max Vessel Diameter"]

    print(f" Location {loc}:")
    print(f"   Pre (n={len(pre)}), Post (n={len(post)})")

    if len(pre) >= 3 and len(post) >= 3:
        stat, p = mannwhitneyu(pre, post, alternative='two-sided')
        sig = " Significant" if p < 0.05 else "Not significant"
        print(f"   Mann-Whitney U p-value = {p:.4f} → {sig}\n")
    else:
        print("    Not enough data to compare\n")

data/Vivosight_data/A52_Results [<data_types.BloodFlow object at 0x16b3c1700>, <data_types.BloodFlow object at 0x16b3c0da0>, <data_types.BloodFlow object at 0x16b1d67b0>, <data_types.BloodFlow object at 0x16b4d0230>, <data_types.BloodFlow object at 0x16b4d3710>, <data_types.BloodFlow object at 0x16b4d3b60>, <data_types.BloodFlow object at 0x16b4d3110>, <data_types.BloodFlow object at 0x16b4d2e40>, <data_types.BloodFlow object at 0x16b4d2c60>, <data_types.BloodFlow object at 0x16b4d2330>, <data_types.BloodFlow object at 0x16b4d1d30>, <data_types.BloodFlow object at 0x16b4d16a0>, <data_types.BloodFlow object at 0x16b4d1130>, <data_types.BloodFlow object at 0x16b4d1160>, <data_types.BloodFlow object at 0x16b3d5280>, <data_types.BloodFlow object at 0x16b2983b0>, <data_types.BloodFlow object at 0x16b4d08c0>, <data_types.BloodFlow object at 0x16b2000e0>, <data_types.BloodFlow object at 0x16b29ab10>, <data_types.BloodFlow object at 0x16b203f20>, <data_types.BloodFlow object at 0x16b4d57f0>, <