In [1]:
import os
import re
import glob
from collections import defaultdict

SRC_DIR = "/SSD5_8TB/Daniel/14_fastddpm/Fast-DDPM/data/SynthradPelvis_test/MR"

# 파일명 패턴: 1PA001_slice000.npy
pat = re.compile(r"^(?P<pid>.+)_slice(?P<idx>\d+)\.npy$")

# 1) 파일 목록 수집
files = glob.glob(os.path.join(SRC_DIR, "*.npy"))
print("Total npy files:", len(files))

# 2) 환자별 slice index 모으기
patient_to_indices = defaultdict(set)

skipped = 0
for fp in files:
    base = os.path.basename(fp)
    m = pat.match(base)
    if not m:
        skipped += 1
        continue
    pid = m.group("pid")
    idx = int(m.group("idx"))
    patient_to_indices[pid].add(idx)

print("Patients found:", len(patient_to_indices))
print("Skipped (pattern mismatch):", skipped)

# 3) 환자별 slice 개수 계산
patient_to_count = {pid: len(idxs) for pid, idxs in patient_to_indices.items()}

# 4) max / min 찾기
max_pid = max(patient_to_count, key=patient_to_count.get)
min_pid = min(patient_to_count, key=patient_to_count.get)

max_count = patient_to_count[max_pid]
min_count = patient_to_count[min_pid]

print("\n=== RESULT ===")
print(f"MAX slices patient: {max_pid}  | slices: {max_count}")
print(f"MIN slices patient: {min_pid}  | slices: {min_count}")

# 5) 동률(여러명)도 보고 싶으면
max_pids = [pid for pid, c in patient_to_count.items() if c == max_count]
min_pids = [pid for pid, c in patient_to_count.items() if c == min_count]

print("\nTies (if any)")
print(f"MAX count ({max_count}) patients:", max_pids)
print(f"MIN count ({min_count}) patients:", min_pids)

# 6) 간단 분포 요약
counts_sorted = sorted(patient_to_count.values())
if counts_sorted:
    import numpy as np
    arr = np.array(counts_sorted)
    print("\n=== SUMMARY ===")
    print("Patients:", len(arr))
    print("Mean:", float(arr.mean()))
    print("Median:", float(np.median(arr)))
    print("Std:", float(arr.std()))
    print("Min:", int(arr.min()))
    print("Max:", int(arr.max()))
    print("Percentiles (5/25/50/75/95):", [float(x) for x in np.percentile(arr, [5,25,50,75,95])])

# 7) (선택) slice index가 연속인지(누락) 체크
#    예: 0..N-1 형태인지 확인
holes = []
for pid, idxs in patient_to_indices.items():
    if not idxs:
        continue
    mn, mx = min(idxs), max(idxs)
    expected = set(range(mn, mx + 1))
    missing = sorted(expected - idxs)
    if missing:
        holes.append((pid, len(missing), mn, mx, missing[:10]))  # missing 일부만

holes = sorted(holes, key=lambda x: x[1], reverse=True)
print("\n=== Missing slice indices check ===")
print("Patients with missing indices:", len(holes))
if holes:
    print("Top 10 with most missing:")
    for pid, nmiss, mn, mx, ex in holes[:10]:
        print(f"- {pid}: missing {nmiss} indices in range [{mn},{mx}] | examples: {ex}")


Total npy files: 2971
Patients found: 34
Skipped (pattern mismatch): 0

=== RESULT ===
MAX slices patient: 1PA111  | slices: 121
MIN slices patient: 1PC063  | slices: 55

Ties (if any)
MAX count (121) patients: ['1PA111']
MIN count (55) patients: ['1PC063', '1PC044', '1PC057']

=== SUMMARY ===
Patients: 34
Mean: 87.38235294117646
Median: 88.5
Std: 23.60086576911752
Min: 55
Max: 121
Percentiles (5/25/50/75/95): [55.0, 64.75, 88.5, 114.75, 117.69999999999999]

=== Missing slice indices check ===
Patients with missing indices: 0
