In [16]:
# CELL 1: Load CSV and Basic Setup
import pandas as pd
from pathlib import Path
import nibabel as nib
import numpy as np
from nilearn import plotting, image
import matplotlib.pyplot as plt

# Load subject info from CSV
CSV_FILE = Path('/user_data/csimmon2/git_repos/long_pt/long_pt_sub_info.csv')
df = pd.read_csv(CSV_FILE)

BASE_DIR = Path("/user_data/csimmon2/long_pt")
OUTPUT_DIR = BASE_DIR / "analyses" / "rsa_corrected"
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

# Session start mapping (for special cases)
SESSION_START = {'sub-010': 2, 'sub-018': 2, 'sub-068': 2}

# Cope mapping: Key -> (Cope Number, Multiplier)
# Multiplier 1  = Use contrast as is (e.g., Face > Scramble)
# Multiplier -1 = Invert contrast (e.g., Face > Word becomes Word > Face)

COPE_MAP = {
    'face':   (10, 1),   # Face > Scramble
    'word':   (13, -1),  # Face > Word (INVERTED to create Word > Face)
    'object': (3,  1),   # Object > Scramble
    'house':  (11, 1)    # House > Scramble
}

# Parcels are touchy. 
CATEGORY_PARCELS = {
    'face': ['fusiform'],
    'word': ['fusiform', 'inferiortemporal'], # needed to add IT to capture VWFA
    'object': ['lateraloccipital'],
    'house': ['parahippocampal', 'lingual', 'isthmuscingulate']
}

print("✓ CSV loaded and configuration set")

✓ CSV loaded and configuration set


In [17]:
# CELL 2: Streamlined Subject Loading and Configuration
def load_subjects_by_group(group_filter=None, patient_only=True):
    """Streamlined subject loading with proper configuration"""
    
    filtered_df = df.copy()
    
    if patient_only is True:
        filtered_df = filtered_df[filtered_df['patient'] == 1]
    elif patient_only is False:
        filtered_df = filtered_df[filtered_df['patient'] == 0]
    
    if group_filter:
        if isinstance(group_filter, str):
            group_filter = [group_filter]
        filtered_df = filtered_df[filtered_df['group'].isin(group_filter)]
    
    subjects = {}
    
    for _, row in filtered_df.iterrows():
        subject_id = row['sub']
        
        subj_dir = BASE_DIR / subject_id
        if not subj_dir.exists():
            continue
            
        # Get available sessions
        sessions = []
        for ses_dir in subj_dir.glob('ses-*'):
            if ses_dir.is_dir():
                sessions.append(ses_dir.name.replace('ses-', ''))
        
        if not sessions:
            continue
            
        sessions = sorted(sessions, key=lambda x: int(x))
        start_session = SESSION_START.get(subject_id, 1)
        available_sessions = [s for s in sessions if int(s) >= start_session]
        
        if not available_sessions:
            continue
        
        # Proper hemisphere mapping
        if row['patient'] == 1:  # Patients
            hemisphere_full = row.get('intact_hemi', 'left')
            hemisphere = 'l' if hemisphere_full == 'left' else 'r'
        else:  # Controls 
            hemisphere = 'r'  # Default for controls (we'll add bilateral later)
        
        subjects[subject_id] = {
            'code': f"{row['group']}{subject_id.split('-')[1]}",
            'sessions': available_sessions,
            'hemi': hemisphere,
            'group': row['group'],
            'patient_status': 'patient' if row['patient'] == 1 else 'control',
            'age_1': row['age_1'] if pd.notna(row['age_1']) else None,
            'surgery_side': row.get('SurgerySide', None) if row['patient'] == 1 else None
        }
    
    return subjects

# Load subjects systematically
ALL_PATIENTS = load_subjects_by_group(group_filter=None, patient_only=True)
OTC_PATIENTS = load_subjects_by_group(group_filter='OTC', patient_only=True)
NON_OTC_PATIENTS = load_subjects_by_group(group_filter='nonOTC', patient_only=True)
ALL_CONTROLS = load_subjects_by_group(group_filter=None, patient_only=False)

# Start with original subjects only
ANALYSIS_SUBJECTS = {**ALL_PATIENTS, **ALL_CONTROLS}

print("STREAMLINED SUBJECT LOADING COMPLETE")
print("="*50)
print(f"Patients loaded: {len(ALL_PATIENTS)}")
print(f"  - OTC: {len(OTC_PATIENTS)}")
print(f"  - nonOTC: {len(NON_OTC_PATIENTS)}")
print(f"Controls loaded: {len(ALL_CONTROLS)}")
print(f"Total analysis subjects: {len(ANALYSIS_SUBJECTS)}")

print(f"\nCORRECT CATEGORY_PARCELS:")
for category, parcels in CATEGORY_PARCELS.items():
    print(f"  {category:6s}: {parcels}")

print(f"\nSample subjects:")
for subj_id, info in list(ANALYSIS_SUBJECTS.items())[:3]:
    print(f"  {info['code']}: {info['group']} {info['patient_status']}, hemi='{info['hemi']}'")

STREAMLINED SUBJECT LOADING COMPLETE
Patients loaded: 15
  - OTC: 6
  - nonOTC: 9
Controls loaded: 9
Total analysis subjects: 24

CORRECT CATEGORY_PARCELS:
  face  : ['fusiform']
  word  : ['fusiform', 'inferiortemporal']
  object: ['lateraloccipital']
  house : ['parahippocampal', 'lingual', 'isthmuscingulate']

Sample subjects:
  OTC004: OTC patient, hemi='l'
  nonOTC007: nonOTC patient, hemi='r'
  OTC008: OTC patient, hemi='l'


In [18]:
# CELL 3: Functional ROI Extraction (FIXED IMPORTS)
from scipy.ndimage import label, center_of_mass 
import numpy as np
import nibabel as nib

def extract_functional_rois_bilateral(subject_id, threshold_z=2.3, min_cluster_size=30):
    info = ANALYSIS_SUBJECTS[subject_id]
    roi_dir = BASE_DIR / subject_id / f'ses-{info["sessions"][0]}' / 'ROIs'
    if not roi_dir.exists(): return {}
    
    all_results = {}
    first_session = info['sessions'][0]

    for hemi in ['l', 'r']:
        for category, cope_params in COPE_MAP.items():
            # Unpack Tuple
            cope_num, multiplier = cope_params if isinstance(cope_params, tuple) else (cope_params, 1)
            
            # Load Search Mask
            mask_file = roi_dir / f'{hemi}_{category}_searchmask.nii.gz'
            if not mask_file.exists(): continue
            
            try:
                search_mask = nib.load(mask_file).get_fdata() > 0
                affine = nib.load(mask_file).affine
            except: continue
            
            hemi_key = f'{hemi}_{category}'
            all_results[hemi_key] = {}
            
            for session in info['sessions']:
                feat_dir = BASE_DIR / subject_id / f'ses-{session}' / 'derivatives' / 'fsl' / 'loc' / 'HighLevel.gfeat'
                z_name = 'zstat1.nii.gz' if session == first_session else f'zstat1_ses{first_session}.nii.gz'
                cope_file = feat_dir / f'cope{cope_num}.feat' / 'stats' / z_name
                
                if not cope_file.exists(): continue
                
                try:
                    # Load & Invert
                    zstat = nib.load(cope_file).get_fdata() * multiplier
                    
                    # Threshold
                    suprathresh = (zstat > threshold_z) & search_mask
                    
                    # Cluster & Filter
                    labeled, n_clusters = label(suprathresh) # <--- This is where it failed
                    if n_clusters == 0: continue
                    
                    best_idx = -1
                    max_peak = -999
                    
                    for i in range(1, n_clusters + 1):
                        cluster_mask = (labeled == i)
                        if np.sum(cluster_mask) >= min_cluster_size:
                            peak_val = np.max(zstat[cluster_mask])
                            if peak_val > max_peak:
                                max_peak = peak_val
                                best_idx = i
                    
                    if best_idx == -1: continue 
                    
                    # Save Result
                    roi_mask = (labeled == best_idx)
                    peak_idx = np.unravel_index(np.argmax(zstat * roi_mask), zstat.shape)
                    
                    all_results[hemi_key][session] = {
                        'n_voxels': int(np.sum(roi_mask)),
                        'peak_z': zstat[peak_idx],
                        'centroid': nib.affines.apply_affine(affine, center_of_mass(roi_mask)),
                        'roi_mask': roi_mask
                    }
                except Exception as e: print(f"Err {subject_id} {category}: {e}")

    return all_results

# EXECUTE
print("Re-running extraction...")
golarai_functional_final = {}
for sub in ANALYSIS_SUBJECTS:
    res = extract_functional_rois_bilateral(sub, min_cluster_size=30)
    if res: golarai_functional_final[sub] = res
print(f"✓ Extraction Complete: {len(golarai_functional_final)} subjects.")

Re-running extraction...
✓ Extraction Complete: 23 subjects.


In [5]:
# CELL 4: Verify Selectivity (Tuple Corrected)
def verify_roi_selectivity(functional_results, subjects, sample_ids=['OTC004', 'control025']):
    print("\nROI SELECTIVITY CHECK")
    print("="*40)
    
    for pid in sample_ids:
        # Find full ID
        sid = next((k for k,v in subjects.items() if v['code'] == pid), None)
        if not sid or sid not in functional_results: continue
        
        print(f"\n>> {pid}:")
        res = functional_results[sid]
        first_ses = subjects[sid]['sessions'][0]
        
        for roi_name in sorted(res.keys()):
            if first_ses not in res[roi_name]: continue
            roi_mask = res[roi_name][first_ses]['roi_mask']
            target_cat = roi_name.split('_')[1]
            
            scores = {}
            feat_dir = BASE_DIR / sid / f'ses-{first_ses}' / 'derivatives' / 'fsl' / 'loc' / 'HighLevel.gfeat'
            
            for cat, (cope, mult) in COPE_MAP.items():
                f = feat_dir / f'cope{cope}.feat' / 'stats' / 'zstat1.nii.gz'
                if f.exists():
                    d = nib.load(f).get_fdata() * mult
                    scores[cat] = np.mean(d[roi_mask])
            
            top = max(scores, key=scores.get)
            mark = "✓" if top == target_cat else "✗"
            print(f"  {roi_name:10s}: {mark} Top={top} (Target: {scores.get(target_cat,0):.2f})")
            
verify_roi_selectivity(golarai_functional_final, ANALYSIS_SUBJECTS)


ROI SELECTIVITY CHECK

>> OTC004:
  l_face    : ✓ Top=face (Target: 2.67)
  l_house   : ✓ Top=house (Target: 3.08)
  l_object  : ✓ Top=object (Target: 3.53)
  l_word    : ✓ Top=word (Target: 2.81)

>> control025:
  l_face    : ✓ Top=face (Target: 6.56)
  l_house   : ✓ Top=house (Target: 4.29)
  l_object  : ✓ Top=object (Target: 5.16)
  l_word    : ✓ Top=word (Target: 4.15)
  r_face    : ✓ Top=face (Target: 5.65)
  r_house   : ✓ Top=house (Target: 4.22)
  r_object  : ✓ Top=object (Target: 5.00)
  r_word    : ✓ Top=word (Target: 3.28)


In [22]:
# CELL 5: Hybrid Stability (with Hemisphere tracking)
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import nibabel as nib
from pathlib import Path

def compute_hybrid_stability(functional_results, subjects, min_voxels=30):
    EXTRACTION_MAP = {'face': 10, 'house': 11, 'object': 3, 'word': 12}
    CATEGORY_TYPES = {
        'face': 'Asymmetric', 'word': 'Asymmetric',
        'house': 'Bilateral', 'object': 'Bilateral'
    }
    
    data = []
    
    for sid, res in functional_results.items():
        if sid not in subjects: continue
        info = subjects[sid]
        
        sessions = sorted(list({s for r in res.values() for s in r.keys()}))
        if len(sessions) < 2: continue
        first_ses = sessions[0]
        
        for roi_name, roi_data in res.items():
            hemi = roi_name.split('_')[0]  # Extract 'l' or 'r'
            cat = roi_name.split('_')[1]
            if first_ses not in roi_data: continue
            
            mask = roi_data[first_ses]['roi_mask']
            if np.sum(mask) < min_voxels: continue
            
            patterns = {}
            valid_extraction = True
            
            for ses in sessions:
                f = BASE_DIR / sid / f'ses-{ses}' / 'derivatives' / 'fsl' / 'loc' / 'HighLevel.gfeat'
                z = f / f'cope{EXTRACTION_MAP[cat]}.feat' / 'stats' / ('zstat1.nii.gz' if ses == first_ses else f'zstat1_ses{first_ses}.nii.gz')
                
                if z.exists():
                    try: patterns[ses] = nib.load(z).get_fdata()[mask]
                    except: valid_extraction = False
                else: valid_extraction = False
            
            if valid_extraction and sessions[-1] in patterns:
                if np.std(patterns[first_ses]) > 0 and np.std(patterns[sessions[-1]]) > 0:
                    corr = np.corrcoef(patterns[first_ses], patterns[sessions[-1]])[0,1]
                    
                    data.append({
                        'Subject': info['code'],
                        'Group': info['group'],
                        'Hemisphere': hemi.upper(),  # ADD THIS
                        'Category': cat.capitalize(),
                        'Type': CATEGORY_TYPES.get(cat, 'Other'), 
                        'Stability (r)': corr
                    })
                
    return pd.DataFrame(data)

df_hybrid = compute_hybrid_stability(golarai_functional_final, ANALYSIS_SUBJECTS)
print(f"✓ Hybrid stability: {len(df_hybrid)} observations")
print(df_hybrid.groupby(['Group', 'Hemisphere']).size())

✓ Hybrid stability: 126 observations
Group    Hemisphere
OTC      L              8
         R             11
control  L             36
         R             35
nonOTC   L             20
         R             16
dtype: int64


In [24]:
# CELL 6: Spatial Drift Analysis (with Hemisphere tracking)
import numpy as np
import pandas as pd

def analyze_drift(functional_results, subjects, min_voxels=30):
    """Calculate spatial drift between first and last sessions"""
    data = []
    
    CATEGORY_TYPES = {
        'face': 'Asymmetric', 'word': 'Asymmetric',
        'house': 'Bilateral', 'object': 'Bilateral'
    }
    
    for sid, res in functional_results.items():
        if sid not in subjects: continue
        info = subjects[sid]
        
        sessions = sorted(list({s for r in res.values() for s in r.keys()}))
        if len(sessions) < 2: continue
        
        first_ses = sessions[0]
        last_ses = sessions[-1]
        
        for roi_name, roi_data in res.items():
            hemi = roi_name.split('_')[0]  # Extract 'l' or 'r'
            cat = roi_name.split('_')[1]
            
            if first_ses not in roi_data or last_ses not in roi_data: continue
            
            # Check voxel counts
            if roi_data[first_ses]['n_voxels'] < min_voxels: continue
            if roi_data[last_ses]['n_voxels'] < min_voxels: continue
            
            # Calculate Euclidean distance between centroids
            c1 = roi_data[first_ses]['centroid']
            c2 = roi_data[last_ses]['centroid']
            drift = np.sqrt(np.sum((c2 - c1)**2))
            
            data.append({
                'Subject': info['code'],
                'Group': info['group'],
                'Hemisphere': hemi.upper(),  # ADD THIS
                'Category': cat.capitalize(),
                'Type': CATEGORY_TYPES.get(cat, 'Other'),
                'Drift (mm)': drift
            })
    
    return pd.DataFrame(data)

df_drift = analyze_drift(golarai_functional_final, ANALYSIS_SUBJECTS, min_voxels=50)
print(f"✓ Spatial drift: {len(df_drift)} observations")
print(df_drift.groupby(['Group', 'Hemisphere']).size())

✓ Spatial drift: 122 observations
Group    Hemisphere
OTC      L              7
         R             10
control  L             36
         R             33
nonOTC   L             20
         R             16
dtype: int64


In [25]:
# CELL 7: RDM Stability Analysis (with Hemisphere tracking)
import numpy as np
import pandas as pd
import nibabel as nib
from scipy.stats import pearsonr

def analyze_rdm(functional_results, subjects, min_voxels=30):
    """Calculate RDM stability between first and last sessions"""
    
    EXTRACTION_MAP = {'face': 10, 'house': 11, 'object': 3, 'word': 12}
    CATEGORY_TYPES = {
        'face': 'Asymmetric', 'word': 'Asymmetric',
        'house': 'Bilateral', 'object': 'Bilateral'
    }
    
    data = []
    
    for sid, res in functional_results.items():
        if sid not in subjects: continue
        info = subjects[sid]
        
        sessions = sorted(list({s for r in res.values() for s in r.keys()}))
        if len(sessions) < 2: continue
        
        first_ses = sessions[0]
        last_ses = sessions[-1]
        
        for roi_name, roi_data in res.items():
            hemi = roi_name.split('_')[0]  # Extract 'l' or 'r'
            cat = roi_name.split('_')[1]
            
            if first_ses not in roi_data or last_ses not in roi_data: continue
            
            # Check voxel threshold
            if roi_data[first_ses]['n_voxels'] < min_voxels: continue
            if roi_data[last_ses]['n_voxels'] < min_voxels: continue
            
            # Extract RDMs for both sessions
            rdms = {}
            for ses in [first_ses, last_ses]:
                mask = roi_data[ses]['roi_mask']
                
                # Extract all 4 categories
                patterns = []
                valid = True
                
                for c in ['face', 'house', 'object', 'word']:
                    feat_dir = BASE_DIR / sid / f'ses-{ses}' / 'derivatives' / 'fsl' / 'loc' / 'HighLevel.gfeat'
                    z_name = 'zstat1.nii.gz' if ses == first_ses else f'zstat1_ses{first_ses}.nii.gz'
                    z_file = feat_dir / f'cope{EXTRACTION_MAP[c]}.feat' / 'stats' / z_name
                    
                    if z_file.exists():
                        try:
                            z_data = nib.load(z_file).get_fdata()[mask]
                            patterns.append(z_data)
                        except:
                            valid = False
                            break
                    else:
                        valid = False
                        break
                
                if not valid or len(patterns) != 4: continue
                
                # Compute RDM (correlation matrix)
                try:
                    corr_matrix = np.corrcoef(patterns)
                    rdm = 1 - corr_matrix
                    rdms[ses] = rdm
                except:
                    continue
            
            # Calculate RDM stability
            if len(rdms) == 2:
                rdm1 = rdms[first_ses]
                rdm2 = rdms[last_ses]
                
                # Flatten upper triangle
                triu_idx = np.triu_indices(4, k=1)
                rdm1_flat = rdm1[triu_idx]
                rdm2_flat = rdm2[triu_idx]
                
                if len(rdm1_flat) > 0 and len(rdm2_flat) > 0:
                    r, _ = pearsonr(rdm1_flat, rdm2_flat)
                    
                    data.append({
                        'Subject': info['code'],
                        'Group': info['group'],
                        'Hemisphere': hemi.upper(),  # ADD THIS
                        'Category': cat.capitalize(),
                        'Type': CATEGORY_TYPES.get(cat, 'Other'),
                        'RDM Stability (r)': r
                    })
    
    return pd.DataFrame(data)

print("Calculating RDM Stability (Geometry Preservation)...")
df_rdm = analyze_rdm(golarai_functional_final, ANALYSIS_SUBJECTS, min_voxels=50)
print(f"✓ RDM stability: {len(df_rdm)} observations")
print(df_rdm.groupby(['Group', 'Hemisphere']).size())

Calculating RDM Stability (Geometry Preservation)...
✓ RDM stability: 122 observations
Group    Hemisphere
OTC      L              7
         R             10
control  L             36
         R             33
nonOTC   L             20
         R             16
dtype: int64


In [26]:
# UPDATE RESULTS.CSV WITH STABILITY METRICS (HEMISPHERE-AWARE)

print("="*80)
print("UPDATING RESULTS.CSV WITH STABILITY METRICS")
print("="*80)

results_path = '/user_data/csimmon2/git_repos/long_pt/B_analyses/results.csv'
df_results = pd.read_csv(results_path)

print(f"Current results.csv: {len(df_results)} rows")

n_updated = {'Hybrid': 0, 'Drift': 0, 'RDM': 0}

for idx, row in df_results.iterrows():
    subject = row['Subject']
    category = row['Category']
    group = row['Group']
    
    # For controls, match by hemisphere too
    if group == 'control':
        hemi = row['nonpt_hemi']  # 'L' or 'R'
        
        # Hybrid
        match = df_hybrid[(df_hybrid['Subject'] == subject) & 
                          (df_hybrid['Category'] == category) &
                          (df_hybrid['Hemisphere'] == hemi)]
        if len(match) > 0:
            df_results.at[idx, 'Hybrid_Stability'] = match.iloc[0]['Stability (r)']
            n_updated['Hybrid'] += 1
        
        # Drift
        match = df_drift[(df_drift['Subject'] == subject) & 
                         (df_drift['Category'] == category) &
                         (df_drift['Hemisphere'] == hemi)]
        if len(match) > 0:
            df_results.at[idx, 'Spatial_Drift_mm'] = match.iloc[0]['Drift (mm)']
            n_updated['Drift'] += 1
        
        # RDM
        match = df_rdm[(df_rdm['Subject'] == subject) & 
                       (df_rdm['Category'] == category) &
                       (df_rdm['Hemisphere'] == hemi)]
        if len(match) > 0:
            df_results.at[idx, 'RDM_Stability'] = match.iloc[0]['RDM Stability (r)']
            n_updated['RDM'] += 1
    
    # For patients, match by subject and category only
    else:
        # Hybrid
        match = df_hybrid[(df_hybrid['Subject'] == subject) & 
                          (df_hybrid['Category'] == category)]
        if len(match) > 0:
            df_results.at[idx, 'Hybrid_Stability'] = match.iloc[0]['Stability (r)']
            n_updated['Hybrid'] += 1
        
        # Drift
        match = df_drift[(df_drift['Subject'] == subject) & 
                         (df_drift['Category'] == category)]
        if len(match) > 0:
            df_results.at[idx, 'Spatial_Drift_mm'] = match.iloc[0]['Drift (mm)']
            n_updated['Drift'] += 1
        
        # RDM
        match = df_rdm[(df_rdm['Subject'] == subject) & 
                       (df_rdm['Category'] == category)]
        if len(match) > 0:
            df_results.at[idx, 'RDM_Stability'] = match.iloc[0]['RDM Stability (r)']
            n_updated['RDM'] += 1

# Save
df_results.to_csv(results_path, index=False)

print(f"\n✓ Updated results.csv")
print(f"  Hybrid Stability: {n_updated['Hybrid']} rows updated")
print(f"  Spatial Drift:    {n_updated['Drift']} rows updated")
print(f"  RDM Stability:    {n_updated['RDM']} rows updated")

# Check completeness
df_check = pd.read_csv(results_path)
print("\n" + "="*80)
print("FINAL DATA COMPLETENESS CHECK")
print("="*80)

for group in ['OTC', 'nonOTC', 'control']:
    group_data = df_check[df_check['Group'] == group]
    print(f"\n{group} (n={len(group_data)} rows):")
    for metric in ['Hybrid_Stability', 'Spatial_Drift_mm', 'RDM_Stability', 'Liu_Distinctiveness']:
        n_missing = group_data[metric].isna().sum()
        pct = 100 * n_missing / len(group_data)
        print(f"  {metric:25s}: {n_missing:3d} missing ({pct:5.1f}%)")

print("\n" + "="*80)

UPDATING RESULTS.CSV WITH STABILITY METRICS
Current results.csv: 127 rows

✓ Updated results.csv
  Hybrid Stability: 125 rows updated
  Spatial Drift:    121 rows updated
  RDM Stability:    121 rows updated

FINAL DATA COMPLETENESS CHECK

OTC (n=19 rows):
  Hybrid_Stability         :   1 missing (  5.3%)
  Spatial_Drift_mm         :   3 missing ( 15.8%)
  RDM_Stability            :   1 missing (  5.3%)
  Liu_Distinctiveness      :   0 missing (  0.0%)

nonOTC (n=36 rows):
  Hybrid_Stability         :   0 missing (  0.0%)
  Spatial_Drift_mm         :   0 missing (  0.0%)
  RDM_Stability            :   0 missing (  0.0%)
  Liu_Distinctiveness      :   0 missing (  0.0%)

control (n=72 rows):
  Hybrid_Stability         :   0 missing (  0.0%)
  Spatial_Drift_mm         :   1 missing (  1.4%)
  RDM_Stability            :   1 missing (  1.4%)
  Liu_Distinctiveness      :   0 missing (  0.0%)



# Playground Below

In [9]:
Don't Run

# CELL 8: Print Full Data Tables and export to csv
import pandas as pd

pd.set_option('display.max_rows', None)
pd.set_option('display.width', 1000)

print("\n" + "="*80)
print("DATA TABLE 1: HYBRID STABILITY (Local Persistence)")
print("="*80)
if 'df_hybrid' in locals():
    print(df_hybrid.sort_values(by=['Group', 'Category', 'Subject']).to_string(index=False))

print("\n" + "="*80)
print("DATA TABLE 2: DRIFT ANALYSIS (Physical Movement)")
print("="*80)
if 'df_drift' in locals():
    print(df_drift.sort_values(by=['Group', 'Category', 'Subject']).to_string(index=False))

print("\n" + "="*80)
print("DATA TABLE 3: RDM STABILITY (Geometry)")
print("="*80)
if 'df_rdm' in locals():
    print(df_rdm.sort_values(by=['Group', 'Type', 'Subject']).to_string(index=False))
    

# ============================================================================
# FINAL ANALYSIS: Exclude sub-079 and update results.csv
# ============================================================================

print("="*80)
print("GENERATING CLEAN STABILITY METRICS (excluding sub-079)")
print("="*80)

# Exclude sub-079 (missing timing files for ses-02)
subjects_clean = {k: v for k, v in ANALYSIS_SUBJECTS.items() if k != 'sub-079'}
golarai_functional_clean = {k: v for k, v in golarai_functional_final.items() if k != 'sub-079'}

print(f"Subjects: {len(subjects_clean)} (excluded sub-079)\n")

# Calculate all three metrics
df_hybrid = compute_hybrid_stability(golarai_functional_clean, subjects_clean, min_voxels=50)
df_drift = analyze_drift(golarai_functional_clean, subjects_clean)
df_rdm = analyze_rdm(golarai_functional_clean, subjects_clean)

# Show summaries
print("\nHybrid Stability:")
print(df_hybrid.groupby(['Group', 'Type'])['Stability (r)'].agg(['mean', 'count']).unstack())

print("\nSpatial Drift:")
print(df_drift.groupby(['Group'])['Drift (mm)'].agg(['mean']).unstack())

print("\nRDM Stability:")
print(df_rdm.groupby(['Group', 'Type'])['RDM Stability (r)'].agg(['mean', 'count']).unstack())

# Export to results.csv format
stability_records = []

for subject_id in subjects_clean.keys():
    code = ANALYSIS_SUBJECTS[subject_id]['code']
    group = ANALYSIS_SUBJECTS[subject_id]['group']
    
    # Add each metric
    for _, row in df_hybrid[df_hybrid['Subject'] == code].iterrows():
        stability_records.append({
            'Subject': subject_id, 'Group': group, 'Category': row['Category'],
            'Type': row['Type'], 'Metric': 'Hybrid_Stability', 'Value': row['Stability (r)']
        })
    
    for _, row in df_drift[df_drift['Subject'] == code].iterrows():
        stability_records.append({
            'Subject': subject_id, 'Group': group, 'Category': row['Category'],
            'Type': row['Type'], 'Metric': 'Spatial_Drift', 'Value': row['Drift (mm)']
        })
    
    for _, row in df_rdm[df_rdm['Subject'] == code].iterrows():
        stability_records.append({
            'Subject': subject_id, 'Group': group, 'Category': row['Category'],
            'Type': row['Type'], 'Metric': 'RDM_Stability', 'Value': row['RDM Stability (r)']
        })

df_stability = pd.DataFrame(stability_records)

# Save
output_path = '/user_data/csimmon2/git_repos/long_pt/B_analyses/stability_metrics.csv'
df_stability.to_csv(output_path, index=False)

print(f"\n✓ Saved: {output_path}")
print(f"  Rows: {len(df_stability)}, Subjects: {df_stability['Subject'].nunique()}")
print("="*80)


# Remove OTC079 from results CSV - due to only one session available

import pandas as pd

# Load the CSV
results_path = '/user_data/csimmon2/git_repos/long_pt/B_analyses/results.csv'
df_results = pd.read_csv(results_path)

# Remove OTC079
df_results_clean = df_results[df_results['Subject'] != 'OTC079'].copy()

# Save
df_results_clean.to_csv(results_path, index=False)

print(f"✓ Removed OTC079")
print(f"  Original rows: {len(df_results)}")
print(f"  Clean rows: {len(df_results_clean)}")
print(f"  Removed: {len(df_results) - len(df_results_clean)} rows")

SyntaxError: EOL while scanning string literal (1115689294.py, line 1)

In [29]:
# ============================================================================
# COMPREHENSIVE STATISTICAL ANALYSIS PLAN
# ============================================================================

import pandas as pd
import numpy as np
from scipy import stats
import statsmodels.api as sm
from statsmodels.formula.api import ols
from statsmodels.stats.anova import anova_lm

# Load data
results_path = '/user_data/csimmon2/git_repos/long_pt/B_analyses/results.csv'
df = pd.read_csv(results_path)

print("="*80)
print("COMPREHENSIVE STATISTICAL ANALYSIS")
print("="*80)

# ============================================================================
# 1. PRIMARY HYPOTHESIS: OTC BILATERAL vs UNILATERAL
# ============================================================================

print("\n1. PRIMARY HYPOTHESIS: OTC Bilateral vs Unilateral")
print("-"*80)

otc = df[df['Group'] == 'OTC'].copy()

metrics = {
    'Hybrid_Stability': 'lower = more change',
    'RDM_Stability': 'lower = more change', 
    'Spatial_Drift_mm': 'higher = more change',
    'Liu_Distinctiveness': 'higher = more specialization'
}

primary_results = []

for metric, interpretation in metrics.items():
    bilateral = otc[otc['Category_Type'] == 'Bilateral'][metric].dropna()
    unilateral = otc[otc['Category_Type'] == 'Unilateral'][metric].dropna()
    
    if len(bilateral) > 0 and len(unilateral) > 0:
        t_stat, p_val = stats.ttest_ind(bilateral, unilateral)
        
        primary_results.append({
            'Metric': metric,
            'Bilateral_M': bilateral.mean(),
            'Unilateral_M': unilateral.mean(),
            'Difference': bilateral.mean() - unilateral.mean(),
            't': t_stat,
            'p': p_val,
            'Interpretation': interpretation
        })
        
        print(f"\n{metric}:")
        print(f"  Bilateral:  M={bilateral.mean():.3f}, SD={bilateral.std():.3f}, n={len(bilateral)}")
        print(f"  Unilateral: M={unilateral.mean():.3f}, SD={unilateral.std():.3f}, n={len(unilateral)}")
        print(f"  t({len(bilateral)+len(unilateral)-2}) = {t_stat:.3f}, p = {p_val:.4f}")
        print(f"  → {interpretation}")
        if p_val < 0.05:
            print("  ✓ SIGNIFICANT")

df_primary = pd.DataFrame(primary_results)

# ============================================================================
# 2. GROUP × CATEGORY TYPE INTERACTION
# ============================================================================

print("\n\n2. GROUP × CATEGORY TYPE INTERACTION")
print("-"*80)
print("Testing if OTC bilateral-unilateral difference is LARGER than controls/nonOTC")

interaction_results = []

for metric in metrics.keys():
    print(f"\n{metric}:")
    
    # Prepare data for ANOVA
    data_clean = df[df[metric].notna()][['Group', 'Category_Type', metric]].copy()
    
    # 2-way ANOVA
    formula = f'{metric} ~ C(Group) + C(Category_Type) + C(Group):C(Category_Type)'
    model = ols(formula, data=data_clean).fit()
    anova_table = anova_lm(model, typ=2)
    
    print(anova_table)
    
    # Extract interaction effect
    interaction_p = anova_table.loc['C(Group):C(Category_Type)', 'PR(>F)']
    
    interaction_results.append({
        'Metric': metric,
        'Interaction_p': interaction_p,
        'Significant': interaction_p < 0.05
    })
    
    if interaction_p < 0.05:
        print(f"  ✓ SIGNIFICANT INTERACTION - Group effect differs by category type")

df_interaction = pd.DataFrame(interaction_results)

# ============================================================================
# 3. CATEGORY-SPECIFIC ANALYSES
# ============================================================================

print("\n\n3. CATEGORY-SPECIFIC ANALYSES")
print("-"*80)
print("Do individual categories show OTC > control/nonOTC effects?")

category_results = []

for category in ['Face', 'Word', 'House', 'Object']:
    print(f"\n{category}:")
    
    for metric in metrics.keys():
        otc_cat = df[(df['Group'] == 'OTC') & (df['Category'] == category)][metric].dropna()
        ctrl_cat = df[(df['Group'] == 'control') & (df['Category'] == category)][metric].dropna()
        nonotc_cat = df[(df['Group'] == 'nonOTC') & (df['Category'] == category)][metric].dropna()
        
        if len(otc_cat) > 0 and len(ctrl_cat) > 0:
            t_otc_ctrl, p_otc_ctrl = stats.ttest_ind(otc_cat, ctrl_cat)
            
            category_results.append({
                'Category': category,
                'Metric': metric,
                'OTC_M': otc_cat.mean(),
                'Control_M': ctrl_cat.mean(),
                'Difference': otc_cat.mean() - ctrl_cat.mean(),
                't': t_otc_ctrl,
                'p': p_otc_ctrl
            })
            
            print(f"  {metric}: OTC={otc_cat.mean():.3f}, Ctrl={ctrl_cat.mean():.3f}, t={t_otc_ctrl:.2f}, p={p_otc_ctrl:.3f}")

df_category = pd.DataFrame(category_results)

# ============================================================================
# 4. COVARIATE ANALYSES (Age, Scan Gap)
# ============================================================================

print("\n\n4. COVARIATE ANALYSES")
print("-"*80)

# Get one row per subject
subjects_unique = df.drop_duplicates('Subject')

print("\nCorrelations with Age (age_1):")
for group in ['OTC', 'nonOTC', 'control']:
    group_subj = subjects_unique[subjects_unique['Group'] == group]
    
    # Average metrics across categories for each subject
    subj_metrics = []
    for subject in group_subj['Subject'].unique():
        subj_data = df[df['Subject'] == subject]
        subj_metrics.append({
            'Subject': subject,
            'age_1': subj_data['age_1'].iloc[0],
            'scan_gap_years': subj_data['scan_gap_years'].iloc[0],
            'Hybrid_mean': subj_data['Hybrid_Stability'].mean(),
            'RDM_mean': subj_data['RDM_Stability'].mean(),
            'Drift_mean': subj_data['Spatial_Drift_mm'].mean(),
            'Liu_mean': subj_data['Liu_Distinctiveness'].mean()
        })
    
    df_subj = pd.DataFrame(subj_metrics)
    
    print(f"\n{group}:")
    for metric in ['Hybrid_mean', 'RDM_mean', 'Drift_mean', 'Liu_mean']:
        r, p = stats.pearsonr(df_subj['age_1'].dropna(), df_subj[metric].dropna())
        print(f"  Age × {metric:15s}: r={r:.3f}, p={p:.3f}")

print("\n\nCorrelations with Scan Gap:")
for group in ['OTC', 'nonOTC', 'control']:
    df_subj = subjects_unique[subjects_unique['Group'] == group]
    print(f"\n{group}: n={len(df_subj)}, gap range={df_subj['scan_gap_years'].min():.2f}-{df_subj['scan_gap_years'].max():.2f}")

# ============================================================================
# 5. HEMISPHERE-SPECIFIC COMPARISONS
# ============================================================================

print("\n\n5. HEMISPHERE-SPECIFIC COMPARISONS")
print("-"*80)

print("\nWord (OTC vs LEFT hemisphere controls only):")
# Implementation needed - see next section

print("\nFace (OTC vs RIGHT hemisphere controls only):")  
# Implementation needed - see next section

print("\n" + "="*80)
print("ANALYSIS COMPLETE")
print("="*80)

COMPREHENSIVE STATISTICAL ANALYSIS

1. PRIMARY HYPOTHESIS: OTC Bilateral vs Unilateral
--------------------------------------------------------------------------------

Hybrid_Stability:
  Bilateral:  M=0.453, SD=0.192, n=10
  Unilateral: M=0.499, SD=0.282, n=8
  t(16) = -0.403, p = 0.6923
  → lower = more change

RDM_Stability:
  Bilateral:  M=0.245, SD=0.389, n=10
  Unilateral: M=0.315, SD=0.474, n=8
  t(16) = -0.344, p = 0.7350
  → lower = more change

Spatial_Drift_mm:
  Bilateral:  M=6.386, SD=4.119, n=9
  Unilateral: M=11.352, SD=10.750, n=7
  t(14) = -1.280, p = 0.2212
  → higher = more change

Liu_Distinctiveness:
  Bilateral:  M=0.368, SD=0.240, n=10
  Unilateral: M=0.159, SD=0.111, n=9
  t(17) = 2.397, p = 0.0283
  → higher = more specialization
  ✓ SIGNIFICANT


2. GROUP × CATEGORY TYPE INTERACTION
--------------------------------------------------------------------------------
Testing if OTC bilateral-unilateral difference is LARGER than controls/nonOTC

Hybrid_Stability:
 