In [1]:
# ============================================================================
# COMPLETE ANALYSIS NOTEBOOK (RESTORED ROBUST LOGIC)
# ============================================================================

import pandas as pd
import numpy as np
import nibabel as nib
from pathlib import Path
from scipy.spatial import procrustes
from scipy.ndimage import label, center_of_mass
import warnings

warnings.filterwarnings('ignore')

# ----------------------------------------------------------------------------
# 1. CONFIGURATION
# ----------------------------------------------------------------------------
BASE_DIR = Path("/user_data/csimmon2/long_pt")
CSV_FILE = Path('/user_data/csimmon2/git_repos/long_pt/long_pt_sub_info.csv')

# Define BOTH Maps here
# Map 1: Standard Liu (Face>Obj, House>Obj, Obj>Scram, Word>Scram)
MAP_LIU = {
    'name': 'Standard Liu (Face > Object)',
    'copes': {'face': 1, 'house': 2, 'object': 3, 'word': 12}
}

# Map 2: Robust Scramble (Everything > Scramble)
MAP_SCRAMBLE = {
    'name': 'Robust Scramble (Cat > Scramble)',
    'copes': {'face': 10, 'house': 11, 'object': 3, 'word': 12}
}

BILATERAL_CATS = ['object', 'house']
UNILATERAL_CATS = ['face', 'word']

# ----------------------------------------------------------------------------
# 2. SUBJECT LOADING
# ----------------------------------------------------------------------------
def load_subjects():
    df = pd.read_csv(CSV_FILE)
    subjects = {}
    for _, row in df.iterrows():
        sub_id = row['sub']
        if not (BASE_DIR / sub_id).exists(): continue
        sessions = sorted([s.name.replace('ses-', '') for s in (BASE_DIR / sub_id).glob('ses-*')])
        if len(sessions) < 2: continue
        subjects[sub_id] = {
            'group': row['group'],
            'hemi': 'l' if row['intact_hemi'] == 'left' else 'r',
            'sessions': sessions[:2]
        }
    return subjects

ANALYSIS_SUBJECTS = load_subjects()
print(f"✓ Loaded {len(ANALYSIS_SUBJECTS)} subjects with paired data.")

# ----------------------------------------------------------------------------
# 3. ROBUST FUNCTIONS (Restored Clustering Logic)
# ----------------------------------------------------------------------------

def get_native_file(sub_id, session, file_type, cope_id):
    base = BASE_DIR / sub_id / f'ses-{session}' / 'derivatives' / 'fsl' / 'loc' / 'HighLevel.gfeat'
    path = base / f'cope{cope_id}.feat' / 'stats' / f'{file_type}1.nii.gz'
    return path if path.exists() else None

def define_roi_cluster_top10(sub_id, session, cope_id, mask_path):
    """
    Finds the Centroid of the LARGEST CLUSTER in the Top 10% of voxels.
    This prevents distant noise specks from dragging the centroid.
    """
    zstat_path = get_native_file(sub_id, session, 'zstat', cope_id)
    if not zstat_path or not mask_path.exists(): return None
    
    # Load Data
    z_img = nib.load(zstat_path)
    z_data = z_img.get_fdata()
    mask_data = nib.load(mask_path).get_fdata() > 0
    
    # Apply Search Mask
    masked_z = z_data * mask_data
    masked_z[masked_z == 0] = np.nan
    
    # 1. Threshold (Top 10%)
    valid_voxels = masked_z[~np.isnan(masked_z)]
    if len(valid_voxels) < 10: return None
    
    thresh = np.percentile(valid_voxels, 90)
    binary_roi = (masked_z >= thresh)
    
    # 2. CLUSTERING (The Critical Fix)
    # Find connected components to exclude random noise specks
    labeled_array, num_features = label(binary_roi)
    if num_features == 0: return None
    
    # Find the largest cluster
    sizes = [np.sum(labeled_array == i+1) for i in range(num_features)]
    largest_cluster_idx = np.argmax(sizes) + 1
    
    # Create final mask of ONLY the largest cluster
    final_cluster_mask = (labeled_array == largest_cluster_idx)
    
    # 3. Weighted Centroid of the Cluster
    coords = np.array(np.where(final_cluster_mask)).T
    weights = masked_z[final_cluster_mask]
    
    if np.sum(weights) == 0: return None
    
    avg_coord = np.average(coords, axis=0, weights=weights)
    
    # Return Centroid (mm), Affine, Shape
    return nib.affines.apply_affine(z_img.affine, avg_coord), z_img.affine, z_img.shape

def create_sphere_mask(centroid, affine, shape, radius=6):
    rx, ry, rz = np.arange(shape[0]), np.arange(shape[1]), np.arange(shape[2])
    grid = np.array(np.meshgrid(rx, ry, rz, indexing='ij')).reshape(3, -1).T
    grid_mm = nib.affines.apply_affine(affine, grid)
    dists = np.linalg.norm(grid_mm - centroid, axis=1)
    mask = np.zeros(shape, dtype=bool)
    mask[tuple(grid[dists <= radius].T)] = True
    return mask

# ----------------------------------------------------------------------------
# 4. MAIN EXECUTION LOOP (Runs BOTH Maps)
# ----------------------------------------------------------------------------

# Pre-calculate upper triangle indices to avoid squareform crash
tri_rows, tri_cols = np.triu_indices(4, k=1)

for map_config in [MAP_LIU, MAP_SCRAMBLE]:
    
    map_name = map_config['name']
    cope_map = map_config['copes']
    
    print(f"\n" + "="*80)
    print(f"RUNNING ANALYSIS FOR: {map_name}")
    print("="*80)
    
    results_data = []

    for sub_id, info in ANALYSIS_SUBJECTS.items():
        s1, s2 = info['sessions']
        hemi = info['hemi']
        
        for roi_cat, roi_cope in cope_map.items():
            
            # 1. Define ROI (S1 Peak with Clustering)
            mask_path = BASE_DIR / sub_id / f'ses-{s1}' / 'ROIs' / f'{hemi}_{roi_cat}_searchmask.nii.gz'
            res = define_roi_cluster_top10(sub_id, s1, roi_cope, mask_path)
            
            if not res: continue
            centroid, affine, shape = res
            sphere_mask = create_sphere_mask(centroid, affine, shape)
            
            # 2. Extract Betas
            betas_s1, betas_s2 = [], []
            valid = True
            for target_cat, target_cope in cope_map.items():
                p1 = get_native_file(sub_id, s1, 'cope', target_cope)
                p2 = get_native_file(sub_id, s2, 'cope', target_cope)
                if not p1 or not p2: valid=False; break
                
                # Nan_to_num handles missing voxels safely
                d1 = np.nan_to_num(nib.load(p1).get_fdata()[sphere_mask])
                d2 = np.nan_to_num(nib.load(p2).get_fdata()[sphere_mask])
                betas_s1.append(d1); betas_s2.append(d2)
                
            if not valid: continue
            
            # 3. Compute RDMs
            b1, b2 = np.array(betas_s1), np.array(betas_s2)
            
            # Variance Check (Skip dead ROIs)
            if np.var(b1) == 0 or np.var(b2) == 0: continue

            with np.errstate(all='ignore'):
                c1, c2 = np.corrcoef(b1), np.corrcoef(b2)
            
            rdm1 = np.nan_to_num(1 - c1)
            rdm2 = np.nan_to_num(1 - c2)
            
            # Ensure diagonal is zero
            np.fill_diagonal(rdm1, 0)
            np.fill_diagonal(rdm2, 0)
            
            # 4. Metrics
            
            # A. Distinctiveness
            idx = list(cope_map.keys()).index(roi_cat)
            others = [i for i in range(4) if i != idx]
            dist1 = np.mean(c1[idx, others])
            dist2 = np.mean(c2[idx, others])
            
            # B. Geometry Instability
            # Manual upper triangle extraction (No crash)
            v1 = rdm1[tri_rows, tri_cols]
            v2 = rdm2[tri_rows, tri_cols]
            
            if np.std(v1) > 0 and np.std(v2) > 0:
                geo_inst = 1 - np.corrcoef(v1, v2)[0,1]
            else:
                geo_inst = np.nan
                
            # C. Procrustes
            # Check Norm to avoid "Input matrices > 1 unique point" error
            if np.linalg.norm(rdm1) > 1e-5 and np.linalg.norm(rdm2) > 1e-5:
                try: m1, m2, disparity = procrustes(rdm1, rdm2)
                except ValueError: disparity = np.nan
            else: disparity = np.nan
            
            results_data.append({
                'Group': info['group'],
                'ROI': roi_cat,
                'Type': 'Unilateral' if roi_cat in UNILATERAL_CATS else 'Bilateral',
                'Delta_Distinctiveness': abs(dist2 - dist1),
                'Geometry_Instability': geo_inst,
                'Procrustes_Error': disparity
            })

    # 5. REPORTING PER MAP
    df = pd.DataFrame(results_data)
    if not df.empty:
        otc = df[df['Group'] == 'OTC']
        
        metrics = ['Delta_Distinctiveness', 'Geometry_Instability', 'Procrustes_Error']
        
        for m in metrics:
            print(f"\n>>> {m} <<<")
            by_type = otc.groupby('Type')[m].mean()
            print(by_type)
            
            diff = by_type.get('Bilateral', 0) - by_type.get('Unilateral', 0)
            print(f"Difference (Bi - Uni): {diff:.4f}")
            
            if diff > 0:
                print("✓ Hypothesis Supported")
            else:
                print("X Hypothesis Failed")
            print("-" * 30)
    else:
        print("Error: No results generated.")

AttributeError: module 'pandas' has no attribute 'read_csv'