# Longitudinal RSA Analysis: VOTC Category Selectivity

**Contrasts**: COPE_MAP_SCRAMBLE (Category > Scrambled baseline)
**ROI Threshold**: Top 10% of voxels (percentile=90) for better SNR

**Measures**:
- Geometry Preservation: RDM correlation T1↔T2 (higher = more stable)
- Distinctiveness: Mean correlation preferred↔non-preferred (lower = more selective)

In [1]:
# CELL 1: Setup
import pandas as pd
import numpy as np
import nibabel as nib
from pathlib import Path
from scipy.ndimage import label, center_of_mass
from scipy.stats import pearsonr, ttest_ind
import warnings
warnings.filterwarnings('ignore')

# Paths
CSV_FILE = Path('/user_data/csimmon2/git_repos/long_pt/long_pt_sub_info.csv')
BASE_DIR = Path('/user_data/csimmon2/long_pt')
SESSION_START = {'sub-010': 2, 'sub-018': 2, 'sub-068': 2}

# SCRAMBLE contrasts - all Category > Scrambled (consistent rationale)
COPE_MAP = {
    'face': (10, 1),
    'word': (12, 1),
    'object': (3, 1),
    'house': (11, 1)
}

CATEGORIES = ['face', 'word', 'object', 'house']
SPHERE_RADIUS = 6
ROI_PERCENTILE = 90  # Top 10% for better SNR
MIN_CLUSTER = 20
MIN_Z = 1.64

print("✓ Config loaded (SCRAMBLE, top-10%)")

✓ Config loaded (SCRAMBLE, top-10%)


In [2]:
# CELL 2: Load Subjects
def load_subjects():
    df = pd.read_csv(CSV_FILE)
    subjects = {}
    
    for _, row in df.iterrows():
        sid = row['sub']
        subj_dir = BASE_DIR / sid
        if not subj_dir.exists():
            continue
        
        sessions = sorted([d.name.replace('ses-', '') for d in subj_dir.glob('ses-*') if d.is_dir()], key=int)
        start = SESSION_START.get(sid, 1)
        sessions = [s for s in sessions if int(s) >= start]
        if not sessions:
            continue
        
        is_patient = row.get('patient', 0) == 1
        if is_patient:
            hemi = 'l' if row.get('intact_hemi', 'left') == 'left' else 'r'
            group = row.get('group', 'unknown')
        else:
            hemi = 'l'
            group = 'control'
        
        subjects[sid] = {
            'sessions': sessions,
            'hemi': hemi,
            'group': group,
            'is_patient': is_patient
        }
    return subjects

SUBJECTS = load_subjects()
print(f"✓ Loaded {len(SUBJECTS)} subjects")
for g in ['OTC', 'nonOTC', 'control']:
    print(f"  {g}: {sum(1 for v in SUBJECTS.values() if v['group'] == g)}")

✓ Loaded 25 subjects
  OTC: 7
  nonOTC: 9
  control: 9


In [3]:
# CELL 3: Core Functions

def create_sphere(center_mni, affine, shape, radius=6):
    i, j, k = np.meshgrid(np.arange(shape[0]), np.arange(shape[1]), np.arange(shape[2]), indexing='ij')
    voxel_coords = np.stack([i, j, k], axis=-1).reshape(-1, 3)
    world_coords = nib.affines.apply_affine(affine, voxel_coords)
    distances = np.linalg.norm(world_coords - center_mni, axis=1)
    return (distances <= radius).reshape(shape)


def extract_rois(cope_map, percentile=90, min_cluster=20, min_z=1.64):
    """Extract ROIs using top-N% threshold."""
    all_rois = {}
    
    for sid, info in SUBJECTS.items():
        first_ses = info['sessions'][0]
        roi_dir = BASE_DIR / sid / f'ses-{first_ses}' / 'ROIs'
        if not roi_dir.exists():
            continue
        
        subject_rois = {}
        
        for hemi in ['l', 'r']:
            for cat, (cope_num, mult) in cope_map.items():
                mask_file = roi_dir / f'{hemi}_{cat}_searchmask.nii.gz'
                if not mask_file.exists():
                    continue
                
                try:
                    mask_img = nib.load(mask_file)
                    search_mask = mask_img.get_fdata() > 0
                    affine = mask_img.affine
                    shape = mask_img.shape
                except:
                    continue
                
                roi_key = f'{hemi}_{cat}'
                subject_rois[roi_key] = {}
                
                for ses in info['sessions']:
                    feat_dir = BASE_DIR / sid / f'ses-{ses}' / 'derivatives' / 'fsl' / 'loc' / 'HighLevel.gfeat'
                    z_name = 'zstat1.nii.gz' if ses == first_ses else f'zstat1_ses{first_ses}.nii.gz'
                    z_file = feat_dir / f'cope{cope_num}.feat' / 'stats' / z_name
                    
                    if not z_file.exists():
                        continue
                    
                    try:
                        z_data = nib.load(z_file).get_fdata() * mult
                        pos_mask = search_mask & (z_data > 0)
                        pos_voxels = z_data[pos_mask]
                        
                        if len(pos_voxels) < min_cluster:
                            continue
                        
                        thresh = max(np.percentile(pos_voxels, percentile), min_z)
                        suprathresh = (z_data > thresh) & search_mask
                        labeled, n_clusters = label(suprathresh)
                        
                        if n_clusters == 0:
                            continue
                        
                        sizes = [(i, np.sum(labeled == i)) for i in range(1, n_clusters + 1)]
                        best_idx, best_size = max(sizes, key=lambda x: x[1])
                        
                        if best_size < min_cluster:
                            continue
                        
                        cluster_mask = (labeled == best_idx)
                        centroid = nib.affines.apply_affine(affine, center_of_mass(cluster_mask))
                        peak_idx = np.unravel_index(np.argmax(z_data * cluster_mask), shape)
                        
                        subject_rois[roi_key][ses] = {
                            'centroid': centroid,
                            'n_voxels': int(best_size),
                            'peak_z': float(z_data[peak_idx]),
                            'threshold': float(thresh),
                            'affine': affine,
                            'shape': shape
                        }
                    except:
                        continue
        
        if subject_rois:
            all_rois[sid] = subject_rois
    
    return all_rois


def compute_rdm(patterns):
    """Compute RDM using 1 - Pearson correlation."""
    n = len(patterns)
    rdm = np.zeros((n, n))
    for i in range(n):
        for j in range(n):
            if i != j:
                r, _ = pearsonr(patterns[i], patterns[j])
                rdm[i, j] = 1 - r
    return rdm

print("✓ Functions defined")

✓ Functions defined


In [4]:
# CELL 4: Extract ROIs
print(f"Extracting ROIs (top-{100-ROI_PERCENTILE}%, SCRAMBLE contrasts)...")

rois = extract_rois(COPE_MAP, percentile=ROI_PERCENTILE, min_cluster=MIN_CLUSTER, min_z=MIN_Z)

print(f"✓ {len(rois)} subjects")

# Verify thresholds
print("\nSample thresholds:")
for sid in list(rois.keys())[:3]:
    for roi_key, sessions in rois[sid].items():
        for ses, data in sessions.items():
            print(f"  {sid} {roi_key}: z>{data['threshold']:.2f}, n={data['n_voxels']}")
            break
        break

Extracting ROIs (top-10%, SCRAMBLE contrasts)...
✓ 24 subjects

Sample thresholds:
  sub-004 l_face: z>2.16, n=448
  sub-007 r_face: z>4.99, n=1228
  sub-008 l_face: z>9.68, n=1296


In [5]:
# CELL 5: Geometry Preservation

def compute_geometry_preservation(rois_dict, cope_map, radius=6):
    results = []
    
    for sid, roi_data in rois_dict.items():
        info = SUBJECTS.get(sid)
        if not info:
            continue
        
        first_ses = info['sessions'][0]
        roi_dir = BASE_DIR / sid / f'ses-{first_ses}' / 'ROIs'
        
        # Get reference
        ref_file = None
        for cat in CATEGORIES:
            for h in ['l', 'r']:
                test = roi_dir / f'{h}_{cat}_searchmask.nii.gz'
                if test.exists():
                    ref_file = test
                    break
            if ref_file:
                break
        if not ref_file:
            continue
        
        ref_img = nib.load(ref_file)
        affine, shape = ref_img.affine, ref_img.shape
        
        for roi_key, sessions_data in roi_data.items():
            sessions = sorted(sessions_data.keys())
            if len(sessions) < 2:
                continue
            
            first_s, last_s = sessions[0], sessions[-1]
            sphere_t1 = create_sphere(sessions_data[first_s]['centroid'], affine, shape, radius)
            sphere_t2 = create_sphere(sessions_data[last_s]['centroid'], affine, shape, radius)
            
            rdms = {}
            for ses, sphere in [(first_s, sphere_t1), (last_s, sphere_t2)]:
                feat_dir = BASE_DIR / sid / f'ses-{ses}' / 'derivatives' / 'fsl' / 'loc' / 'HighLevel.gfeat'
                
                patterns = []
                valid = True
                for cat in CATEGORIES:
                    cope_num, mult = cope_map[cat]
                    z_name = 'zstat1.nii.gz' if ses == first_ses else f'zstat1_ses{first_ses}.nii.gz'
                    cope_file = feat_dir / f'cope{cope_num}.feat' / 'stats' / z_name
                    
                    if not cope_file.exists():
                        valid = False
                        break
                    
                    data = nib.load(cope_file).get_fdata() * mult
                    pattern = data[sphere]
                    
                    if len(pattern) == 0 or not np.all(np.isfinite(pattern)):
                        valid = False
                        break
                    patterns.append(pattern)
                
                if valid and len(patterns) == 4:
                    try:
                        rdms[ses] = compute_rdm(patterns)
                    except:
                        pass
            
            if len(rdms) == 2:
                triu_idx = np.triu_indices(4, k=1)
                r, p = pearsonr(rdms[first_s][triu_idx], rdms[last_s][triu_idx])
                
                hemi, cat = roi_key.split('_')
                results.append({
                    'subject': sid,
                    'group': info['group'],
                    'roi': roi_key,
                    'hemi': hemi,
                    'category': cat,
                    'category_type': 'Bilateral' if cat in ['object', 'house'] else 'Unilateral',
                    'geometry_preservation': r,
                    'p_value': p
                })
    
    return pd.DataFrame(results)

print("Computing Geometry Preservation...")
geometry_df = compute_geometry_preservation(rois, COPE_MAP, radius=SPHERE_RADIUS)
print(f"✓ {len(geometry_df)} measurements")

Computing Geometry Preservation...
✓ 132 measurements


In [6]:
# CELL 6: Geometry Results

if len(geometry_df) > 0:
    print("="*60)
    print("GEOMETRY PRESERVATION (SCRAMBLE, top-10%)")
    print("="*60)
    
    print("\nBy Group & Category Type:")
    summary = geometry_df.groupby(['group', 'category_type'])['geometry_preservation'].agg(['mean', 'std', 'count'])
    print(summary.round(3))
    
    # OTC test
    print("\nOTC Bilateral vs Unilateral:")
    otc = geometry_df[geometry_df['group'] == 'OTC']
    if len(otc) > 0:
        bil = otc[otc['category_type'] == 'Bilateral']['geometry_preservation']
        uni = otc[otc['category_type'] == 'Unilateral']['geometry_preservation']
        if len(bil) > 1 and len(uni) > 1:
            t, p = ttest_ind(bil, uni)
            print(f"  Bilateral: {bil.mean():.3f} ± {bil.std():.3f} (n={len(bil)})")
            print(f"  Unilateral: {uni.mean():.3f} ± {uni.std():.3f} (n={len(uni)})")
            print(f"  t={t:.3f}, p={p:.4f}")
else:
    print("No results.")

GEOMETRY PRESERVATION (SCRAMBLE, top-10%)

By Group & Category Type:
                        mean    std  count
group   category_type                     
OTC     Bilateral      0.226  0.471     12
        Unilateral     0.039  0.554     12
control Bilateral      0.547  0.499     36
        Unilateral     0.551  0.449     36
nonOTC  Bilateral      0.752  0.318     18
        Unilateral     0.551  0.420     18

OTC Bilateral vs Unilateral:
  Bilateral: 0.226 ± 0.471 (n=12)
  Unilateral: 0.039 ± 0.554 (n=12)
  t=0.895, p=0.3807


In [7]:
# CELL 7: Distinctiveness

def compute_distinctiveness(rois_dict, cope_map, radius=6):
    roi_preferred = {f'{h}_{c}': c for h in ['l','r'] for c in CATEGORIES}
    results = []
    
    for sid, roi_data in rois_dict.items():
        info = SUBJECTS.get(sid)
        if not info:
            continue
        
        first_ses = info['sessions'][0]
        roi_dir = BASE_DIR / sid / f'ses-{first_ses}' / 'ROIs'
        
        ref_file = None
        for cat in CATEGORIES:
            for h in ['l', 'r']:
                test = roi_dir / f'{h}_{cat}_searchmask.nii.gz'
                if test.exists():
                    ref_file = test
                    break
            if ref_file:
                break
        if not ref_file:
            continue
        
        ref_img = nib.load(ref_file)
        affine, shape = ref_img.affine, ref_img.shape
        
        for roi_key, sessions_data in roi_data.items():
            if roi_key not in roi_preferred:
                continue
            
            pref_cat = roi_preferred[roi_key]
            pref_idx = CATEGORIES.index(pref_cat)
            nonpref_idx = [i for i, c in enumerate(CATEGORIES) if c != pref_cat]
            
            for ses, ses_data in sessions_data.items():
                sphere = create_sphere(ses_data['centroid'], affine, shape, radius)
                feat_dir = BASE_DIR / sid / f'ses-{ses}' / 'derivatives' / 'fsl' / 'loc' / 'HighLevel.gfeat'
                
                patterns = []
                valid = True
                for cat in CATEGORIES:
                    cope_num, mult = cope_map[cat]
                    z_name = 'zstat1.nii.gz' if ses == first_ses else f'zstat1_ses{first_ses}.nii.gz'
                    cope_file = feat_dir / f'cope{cope_num}.feat' / 'stats' / z_name
                    
                    if not cope_file.exists():
                        valid = False
                        break
                    
                    data = nib.load(cope_file).get_fdata() * mult
                    pattern = data[sphere]
                    if len(pattern) == 0 or not np.all(np.isfinite(pattern)):
                        valid = False
                        break
                    patterns.append(pattern)
                
                if not valid or len(patterns) != 4:
                    continue
                
                try:
                    corr = np.corrcoef(patterns)
                    corr_fisher = np.arctanh(np.clip(corr, -0.999, 0.999))
                    distinctiveness = np.mean([corr_fisher[pref_idx, i] for i in nonpref_idx])
                    
                    results.append({
                        'subject': sid,
                        'group': info['group'],
                        'roi': roi_key,
                        'category': pref_cat,
                        'session': ses,
                        'distinctiveness': distinctiveness
                    })
                except:
                    continue
    
    return pd.DataFrame(results)

print("Computing Distinctiveness...")
distinctiveness_df = compute_distinctiveness(rois, COPE_MAP, radius=SPHERE_RADIUS)
print(f"✓ {len(distinctiveness_df)} measurements")

Computing Distinctiveness...
✓ 296 measurements


In [8]:
# CELL 8: Distinctiveness Results

if len(distinctiveness_df) > 0:
    print("="*60)
    print("DISTINCTIVENESS (lower = more selective)")
    print("="*60)
    
    avg = distinctiveness_df.groupby(['subject', 'group', 'category'])['distinctiveness'].mean().reset_index()
    summary = avg.groupby(['group', 'category'])['distinctiveness'].agg(['mean', 'std', 'count'])
    print(summary.round(3))
else:
    print("No results.")

DISTINCTIVENESS (lower = more selective)
                   mean    std  count
group   category                     
OTC     face      0.549  0.144      6
        house     0.238  0.376      6
        object    0.385  0.340      6
        word      0.439  0.201      6
control face      0.669  0.205      9
        house     0.168  0.213      9
        object    0.503  0.177      9
        word      0.520  0.140      9
nonOTC  face      0.447  0.182      9
        house     0.117  0.269      9
        object    0.405  0.414      9
        word      0.370  0.195      9


In [9]:
# CELL 9: Diagnostic - Check RDM values

print("DIAGNOSTIC: RDM structure check\n")

test_sid = list(rois.keys())[0]
test_info = SUBJECTS[test_sid]
first_ses = test_info['sessions'][0]
roi_key = list(rois[test_sid].keys())[0]
centroid = rois[test_sid][roi_key][first_ses]['centroid']

roi_dir = BASE_DIR / test_sid / f'ses-{first_ses}' / 'ROIs'
ref_file = roi_dir / f'{roi_key.split("_")[0]}_face_searchmask.nii.gz'
ref_img = nib.load(ref_file)
affine, shape = ref_img.affine, ref_img.shape

sphere = create_sphere(centroid, affine, shape, radius=SPHERE_RADIUS)
feat_dir = BASE_DIR / test_sid / f'ses-{first_ses}' / 'derivatives' / 'fsl' / 'loc' / 'HighLevel.gfeat'

print(f"Subject: {test_sid}, ROI: {roi_key}, Voxels: {sphere.sum()}\n")

patterns = []
for cat in CATEGORIES:
    cope_num, mult = COPE_MAP[cat]
    cope_file = feat_dir / f'cope{cope_num}.feat' / 'stats' / 'zstat1.nii.gz'
    data = nib.load(cope_file).get_fdata() * mult
    pattern = data[sphere]
    patterns.append(pattern)
    print(f"{cat}: cope{cope_num}, mean={pattern.mean():.2f}, std={pattern.std():.2f}")

rdm = compute_rdm(patterns)
triu = rdm[np.triu_indices(4, k=1)]
print(f"\nRDM upper tri: {np.round(triu, 3)}")
print(f"Range: [{triu.min():.3f}, {triu.max():.3f}]")
if triu.max() <= 2.0 and triu.min() >= 0:
    print("✓ Valid RDM (all values in [0, 2])")
else:
    print("⚠ Unexpected RDM values")

DIAGNOSTIC: RDM structure check

Subject: sub-004, ROI: l_face, Voxels: 914

face: cope10, mean=1.82, std=0.77
word: cope12, mean=-0.24, std=1.05
object: cope3, mean=1.42, std=0.87
house: cope11, mean=0.66, std=0.74

RDM upper tri: [1.096 1.158 0.863 0.729 0.761 0.795]
Range: [0.729, 1.158]
✓ Valid RDM (all values in [0, 2])


In [10]:
# CELL 10: Save Results

output_dir = Path('/user_data/csimmon2/git_repos/long_pt/B_analyses')
output_dir.mkdir(exist_ok=True, parents=True)

if len(geometry_df) > 0:
    f = output_dir / 'geometry_preservation_scramble_top10.csv'
    geometry_df.to_csv(f, index=False)
    print(f"✓ Saved: {f}")

if len(distinctiveness_df) > 0:
    f = output_dir / 'distinctiveness_scramble_top10.csv'
    distinctiveness_df.to_csv(f, index=False)
    print(f"✓ Saved: {f}")

print("\nDone.")

✓ Saved: /user_data/csimmon2/git_repos/long_pt/B_analyses/geometry_preservation_scramble_top10.csv
✓ Saved: /user_data/csimmon2/git_repos/long_pt/B_analyses/distinctiveness_scramble_top10.csv

Done.
