In [21]:
import os
import numpy as np
import pandas as pd
from PIL import Image
import cv2

# 路径定义
# data_dir = './videos/MRI515_T2'
# data_dir = './videos/MRI515_T1'
# data_dir = './videos/AFL_MRI'
# # data_dir = './videos/bed_rest_MRI/dino_order_1st'

data_dir = './videos/bed_rest_MRI/bed_rest_MRI_L5low/1核'

label_dir = os.path.join(data_dir,'label_in_png_renamed')

# prompt_dir = 'manual_prompt_frame0'
# # prompt_dir ='manual_prompt_frame0_1'
prompt_dir ='manual_prompt_frame0_1_2'

auto_seg_dirs = { 
    "nolap": os.path.join(data_dir, prompt_dir, 'SAM2_seg_mask_nolap'),
    "yeslap": os.path.join(data_dir, prompt_dir, 'SAM2_seg_mask_yeslap')
}
csv_write_path = os.path.join(data_dir, prompt_dir, f"{prompt_dir}_dsc_overlap.csv")

# 确保目录存在
if not all(os.path.exists(d) for d in [label_dir] + list(auto_seg_dirs.values())):
    raise FileNotFoundError("Missing required directories!")

# 计算 DSC 和 Overlap
def calculate_metric(mask1, mask2, metric="dsc"):
    intersection = np.sum((mask1 > 0) & (mask2 > 0))
    total_area = np.sum(mask1 > 0) + np.sum(mask2 > 0)
    return (2 * intersection / total_area) if metric == "dsc" else (intersection / total_area) if total_area > 0 else 0.0

# 类别定义
class_ranges = {1: (30, 70), 2: (80, 120), 3: (130, 170), 4: (180, 220)}
# class_ranges = {1: (30, 70), 2: (80, 120)}

# 处理所有文件
results = []
for label_file in os.listdir(label_dir):
    label_index = int(label_file.split('.')[0])
    label_mask = np.array(Image.open(os.path.join(label_dir, label_file)))

    # 读取 segmentation masks（如果缺失则跳过）
    masks = {k: {} for k in auto_seg_dirs}
    missing_files = False
    for k, v in auto_seg_dirs.items():
        for c in class_ranges:
            mask_path = os.path.join(v, f"frame_{label_index}_obj_{c}.png")
            if os.path.exists(mask_path):
                masks[k][c] = np.array(Image.open(mask_path))
            else:
                print(f"Missing file: {mask_path}")
                missing_files = True
                break  # 发现缺失文件，跳过该 `label_file`
    if missing_files:
        continue

    file_results = {"file_name": label_file}
    
    for c, (low, high) in class_ranges.items():
        label_bin = ((low <= label_mask) & (label_mask <= high)).astype(np.uint8)  # 转换为 uint8

        # 处理 nolap 和 yeslap segmentation masks
        for seg_type in ["nolap", "yeslap"]:
            mask_bin = ((low <= masks[seg_type][c]) & (masks[seg_type][c] <= high)).astype(np.uint8)
            masks[seg_type][c] = cv2.resize(mask_bin, label_bin.shape[::-1], interpolation=cv2.INTER_NEAREST)

        file_results.update({
            f"class{c}_dsc": calculate_metric(label_bin, masks["nolap"][c], "dsc"),
            f"class{c}_label_area": np.sum(label_bin),
            f"class{c}_auto_area": np.sum(masks["nolap"][c])
        })

    # 计算 Overlap Ratio（使用 yeslap）
    file_results["overlap_ratio"] = calculate_metric(masks["yeslap"][1], masks["yeslap"][2], "overlap")

    results.append(file_results)
#     print(f"Processed {label_file}: {file_results}")

# 保存 CSV
pd.DataFrame(results).to_csv(csv_write_path, index=False)


  return (2 * intersection / total_area) if metric == "dsc" else (intersection / total_area) if total_area > 0 else 0.0


In [22]:
results_df = pd.DataFrame(results)
results_df.head()

Unnamed: 0,file_name,class1_dsc,class1_label_area,class1_auto_area,class2_dsc,class2_label_area,class2_auto_area,class3_dsc,class3_label_area,class3_auto_area,class4_dsc,class4_label_area,class4_auto_area,overlap_ratio
0,00071.png,0.360887,813,179,0.687211,488,810,0.941694,471,438,0.591837,621,261,0.004028
1,00049.png,0.787234,87,101,0.943898,897,832,0.950532,818,779,0.846847,124,98,0.007447
2,00037.png,0.828402,283,224,0.934637,803,834,0.907053,806,711,0.864865,160,210,0.039055
3,00038.png,0.826087,168,154,0.96056,766,806,0.947812,953,944,0.829787,173,203,0.027356
4,00028.png,0.861972,158,197,0.952813,552,550,0.785448,636,436,0.87395,173,184,0.0


In [23]:
import pandas as pd

# 假设 results_df 已经被定义
# 定义需要移除的 frame indices，并格式化为 5 位数字
frame_idx = [0]
# frame_idx = [0, 1]

frame_idx = [0, 1,2]

frame_idx_patterns = [f"{str(idx).zfill(5)}" for idx in frame_idx]  # ['frame00000', 'frame00001', 'frame00002']

# 构造正则表达式（匹配任何一个 frameXXXXX）
regex_pattern = "|".join(frame_idx_patterns)

# 过滤掉包含这些 frameXXXXX 的行
filtered_results_df = results_df[~results_df['file_name'].str.contains(regex_pattern, regex=True)].copy()

# 显示过滤后的 DataFrame 形状
print("Filtered DataFrame shape:", filtered_results_df.shape)
print(results_df.shape)
print(filtered_results_df.shape)

# 预览前几行数据
filtered_results_df.head()


Filtered DataFrame shape: (69, 14)
(72, 14)
(69, 14)


Unnamed: 0,file_name,class1_dsc,class1_label_area,class1_auto_area,class2_dsc,class2_label_area,class2_auto_area,class3_dsc,class3_label_area,class3_auto_area,class4_dsc,class4_label_area,class4_auto_area,overlap_ratio
0,00071.png,0.360887,813,179,0.687211,488,810,0.941694,471,438,0.591837,621,261,0.004028
1,00049.png,0.787234,87,101,0.943898,897,832,0.950532,818,779,0.846847,124,98,0.007447
2,00037.png,0.828402,283,224,0.934637,803,834,0.907053,806,711,0.864865,160,210,0.039055
3,00038.png,0.826087,168,154,0.96056,766,806,0.947812,953,944,0.829787,173,203,0.027356
4,00028.png,0.861972,158,197,0.952813,552,550,0.785448,636,436,0.87395,173,184,0.0


In [24]:
import numpy as np
import pandas as pd
from scipy.stats import pearsonr, ttest_ind, ks_2samp, shapiro

# 计算 4 类的 DSC 统计信息
dsc_stats = {}
for i in range(1, 5):
    dsc_values = filtered_results_df[f'class{i}_dsc'].dropna().to_numpy()
    
    if len(dsc_values) > 0:
        dsc_stats[f'class{i}'] = {
            'count': len(dsc_values),
            'max': np.max(dsc_values),
            'min': np.min(dsc_values),
            'mean': np.mean(dsc_values),
            'std': np.std(dsc_values)
        }

# 输出 DSC 统计信息
print("\nDSC Statistics: on len", len(dsc_values)  )
for class_name, stats in dsc_stats.items():
    print(f"{class_name}: Mean={stats['mean']:.3f}, Std={stats['std']:.3f}, Min={stats['min']:.3f}, Max={stats['max']:.6f}")

# 统计 Label Area 和 Auto Area 差异
print("\nComparing Label and Auto-Seg Areas...")

for i in range(1, 5):
    label_areas = filtered_results_df[f'class{i}_label_area'].dropna().to_numpy()
    auto_seg_areas = filtered_results_df[f'class{i}_auto_area'].dropna().to_numpy()

    if len(label_areas) == 0 or len(auto_seg_areas) == 0:
        print(f"Skipping class {i} due to insufficient data.")
        continue

    # 正态性检验
    shapiro_label = shapiro(label_areas)
    shapiro_auto_seg = shapiro(auto_seg_areas)

    print(f"\nClass {i} - Normality Test (Shapiro-Wilk):")
    print(f"Label Areas: W={shapiro_label.statistic:.4f}, p={shapiro_label.pvalue:.4e}")
    print(f"Auto-Seg Areas: W={shapiro_auto_seg.statistic:.4f}, p={shapiro_auto_seg.pvalue:.4e}")

    label_is_normal = shapiro_label.pvalue > 0.05
    auto_seg_is_normal = shapiro_auto_seg.pvalue > 0.05

    # 选择合适的统计检验方法
    if label_is_normal and auto_seg_is_normal:
        t_stat, p_value = ttest_ind(label_areas, auto_seg_areas, equal_var=False)
        print(f"Method: Independent Two-Sample T-Test")
        print(f"T-statistic: {t_stat:.4f}, P-value: {p_value:.3f}")
    else:
        ks_stat, p_value = ks_2samp(label_areas, auto_seg_areas)
        print(f"Method: Kolmogorov-Smirnov Test")
        print(f"KS-statistic: {ks_stat:.4f}, P-value: {p_value:.3f}")

    # 解释检验结果
    if p_value < 0.05:
        print("  - Significant difference detected (p < 0.05).")
    else:
        print("  - No significant difference detected (p >= 0.05).")



DSC Statistics: on len 68
class1: Mean=0.567, Std=0.376, Min=0.000, Max=0.938856
class2: Mean=0.923, Std=0.043, Min=0.687, Max=0.970509
class3: Mean=0.924, Std=0.039, Min=0.785, Max=0.982578
class4: Mean=0.622, Std=0.345, Min=0.000, Max=0.931646

Comparing Label and Auto-Seg Areas...

Class 1 - Normality Test (Shapiro-Wilk):
Label Areas: W=0.7890, p=1.4674e-08
Auto-Seg Areas: W=0.7424, p=1.1213e-09
Method: Kolmogorov-Smirnov Test
KS-statistic: 0.2319, P-value: 0.049
  - Significant difference detected (p < 0.05).

Class 2 - Normality Test (Shapiro-Wilk):
Label Areas: W=0.9841, p=5.3004e-01
Auto-Seg Areas: W=0.9868, p=6.8336e-01
Method: Independent Two-Sample T-Test
T-statistic: -1.7951, P-value: 0.075
  - No significant difference detected (p >= 0.05).

Class 3 - Normality Test (Shapiro-Wilk):
Label Areas: W=0.9873, p=7.1140e-01
Auto-Seg Areas: W=0.9866, p=6.7366e-01
Method: Independent Two-Sample T-Test
T-statistic: 0.5407, P-value: 0.590
  - No significant difference detected (p >= 

In [25]:
DSC Statistics:
class1: Mean=0.871333, Std=0.127612, Min=0.290713, Max=0.973578
class2: Mean=0.890994, Std=0.089284, Min=0.479254, Max=0.971504
class3: Mean=0.907558, Std=0.050823, Min=0.769513, Max=0.968762
class4: Mean=0.898424, Std=0.078700, Min=0.622362, Max=0.969160
    
0.781  0.133  0.076  0.466

SyntaxError: invalid syntax. Perhaps you forgot a comma? (2183385047.py, line 1)