# Glu participant list comparison
Load two Excel files, extract participant IDs, and verify exact correspondence.

In [2]:
import pandas as pd
import re
from typing import Optional

file_t1 = r"C:\Users\okkam\Desktop\labo\article 2\Longitudinal_Multimodal_Data_CIMAQ\Glu\T1_subset_35.xlsx"
file_t2_t3 = r"C:\Users\okkam\Desktop\labo\article 2\Longitudinal_Multimodal_Data_CIMAQ\Glu\T2_T3_Glu_35.xlsx"

df_t1 = pd.read_excel(file_t1)
df_t2_t3 = pd.read_excel(file_t2_t3)

def find_participant_column(df: pd.DataFrame) -> Optional[str]:
    preferred = [
        "PSCID",
        "id_participant",
        "Participant_ID",
        "participant_id",
        "participant",
        "ID",
    ]
    cols = list(df.columns)
    lower_map = {c.lower(): c for c in cols}
    for name in preferred:
        if name.lower() in lower_map:
            return lower_map[name.lower()]
    # fallback: first column containing 'participant' or 'pscid' or 'id_participant' or 'id'
    for c in cols:
        c_lower = c.lower()
        if any(key in c_lower for key in ["participant", "pscid", "id_participant", "subject", "id"]):
            return c
    return None

def normalize_base_id(value: object) -> str:
    """Normalize IDs like '3025432_V31S' -> '3025432'."""
    text = str(value).strip()
    base = text.split("_")[0]
    digits = re.findall(r"\d+", base)
    return digits[0] if digits else base

col_t1 = find_participant_column(df_t1)
col_t2_t3 = find_participant_column(df_t2_t3)

if col_t1 is None or col_t2_t3 is None:
    print("Could not auto-detect participant column.")
    print("T1 columns:", df_t1.columns.tolist())
    print("T2/T3 columns:", df_t2_t3.columns.tolist())
else:
    participants_t1 = sorted(
        df_t1[col_t1].dropna().map(normalize_base_id).unique().tolist()
    )
    participants_t2_t3 = sorted(
        df_t2_t3[col_t2_t3].dropna().map(normalize_base_id).unique().tolist()
    )

    print(f"Detected participant column (T1): {col_t1}")
    print(f"Detected participant column (T2/T3): {col_t2_t3}")
    print(f"T1 participants (base IDs): {len(participants_t1)}")
    print(f"T2/T3 participants (base IDs): {len(participants_t2_t3)}")

    set_t1 = set(participants_t1)
    set_t2_t3 = set(participants_t2_t3)
    missing_in_t2_t3 = sorted(set_t1 - set_t2_t3)
    missing_in_t1 = sorted(set_t2_t3 - set_t1)

    if not missing_in_t2_t3 and not missing_in_t1:
        print("✓ Participant lists correspond exactly (base IDs).")
    else:
        print("✗ Participant lists do not match (base IDs).")
        if missing_in_t2_t3:
            print("Participants in T1 but not in T2/T3:", missing_in_t2_t3)
        if missing_in_t1:
            print("Participants in T2/T3 but not in T1:", missing_in_t1)

Detected participant column (T1): Participant_ID
Detected participant column (T2/T3): Participant_ID
T1 participants (base IDs): 35
T2/T3 participants (base IDs): 35
✓ Participant lists correspond exactly (base IDs).


In [5]:
import os
import shutil
from pathlib import Path

# Source roots
precuneus_root = Path(r"D:\01-Raw_data-spectro\Precuneus")
acc_root = Path(r"D:\01-Raw_data-spectro\ACC")

# Destination roots (flat, no subfolders)
precuneus_dest = Path(r"C:\Users\okkam\Desktop\labo\article 2\Longitudinal_Multimodal_Data_CIMAQ\Glu\visual inspection\Precuneus")
acc_dest = Path(r"C:\Users\okkam\Desktop\labo\article 2\Longitudinal_Multimodal_Data_CIMAQ\Glu\visual inspection\ACC")

# Use union of base IDs from both lists
base_ids = sorted(set(participants_t1) | set(participants_t2_t3))

def copy_fit_summaries_flat(region_root: Path, dest_root: Path, base_ids: list[str]) -> None:
    copied = 0
    missing_participants = []
    missing_pngs = []

    if not region_root.exists():
        print(f"Source folder not found: {region_root}")
        return

    dest_root.mkdir(parents=True, exist_ok=True)

    for base_id in base_ids:
        # Find participant folders starting with base_id (e.g., 3002498_327986_V17S)
        matches = sorted([p for p in region_root.glob(f"{base_id}_*") if p.is_dir()])
        if not matches:
            missing_participants.append(base_id)
            continue

        for participant_folder in matches:
            src_png = participant_folder / "first_run_data" / "fit_tissue_adjusted" / "fit_summary.png"
            if not src_png.exists():
                missing_pngs.append(str(src_png))
                continue

            # Flat copy: name file as <participant_folder>_fit_summary.png
            dest_name = f"{participant_folder.name}_fit_summary.png"
            dest_png = dest_root / dest_name
            shutil.copy2(src_png, dest_png)
            copied += 1
    
    print(f"\nRegion: {region_root.name}")
    print(f"Copied PNGs: {copied}")
    print(f"Missing participant folders: {len(missing_participants)}")
    print(f"Missing PNGs: {len(missing_pngs)}")
    if missing_participants:
        print("Participants without folders:", missing_participants)
    if missing_pngs:
        print("Example missing PNG:", missing_pngs[0])

# Run for Precuneus and ACC
copy_fit_summaries_flat(precuneus_root, precuneus_dest, base_ids)
copy_fit_summaries_flat(acc_root, acc_dest, base_ids)


Region: Precuneus
Copied PNGs: 73
Missing participant folders: 0
Missing PNGs: 0

Region: ACC
Copied PNGs: 73
Missing participant folders: 0
Missing PNGs: 0


In [8]:
from pathlib import Path
import pandas as pd

# Simple inspection log (one row per participant)
log_dest = Path(r"C:\Users\okkam\Desktop\labo\article 2\Longitudinal_Multimodal_Data_CIMAQ\Glu\visual inspection")
log_dest.mkdir(parents=True, exist_ok=True)

simple_log = pd.DataFrame({
    "base_id": sorted(set(participants_t1) | set(participants_t2_t3)),
    "pass": "",
    "notes": "",
})

simple_log_path = log_dest / "glu_visual_inspection_log_simple.csv"
simple_log.to_csv(simple_log_path, index=False)
print(f"Created simple inspection log: {simple_log_path}")
print(f"Rows: {len(simple_log)}")

Created simple inspection log: C:\Users\okkam\Desktop\labo\article 2\Longitudinal_Multimodal_Data_CIMAQ\Glu\visual inspection\glu_visual_inspection_log_simple.csv
Rows: 35
