In [5]:
# =============================================================================
# CELL 1: Setup and Configuration
# =============================================================================
import pandas as pd
from pathlib import Path
import nibabel as nib
import numpy as np
from scipy.ndimage import label, center_of_mass
from scipy.stats import pearsonr, ttest_ind, f_oneway, ttest_1samp
import matplotlib.pyplot as plt
import seaborn as sns

# Paths
CSV_FILE = Path('/user_data/csimmon2/git_repos/long_pt/long_pt_sub_info.csv')
BASE_DIR = Path("/user_data/csimmon2/long_pt")

# Session adjustments
SESSION_START = {'sub-010': 2, 'sub-018': 2, 'sub-068': 2}

# Contrast definitions
COPE_MAP_DIFFERENTIAL = {
    'face': (10, 1),
    'word': (13, -1),
    'object': (3, 1),
    'house': (11, 1)
}

# Exclusions
EXCLUDE_SUBS = ['sub-025', 'sub-027', 'sub-045', 'sub-072']

# Load subject info
df = pd.read_csv(CSV_FILE)

print("✓ Cell 1 complete")

✓ Cell 1 complete


In [6]:
# =============================================================================
# CELL 2: Load Subjects by Group
# =============================================================================
def load_subjects_by_group(group_filter=None, patient_only=True):
    filtered_df = df.copy()
    
    if patient_only is True:
        filtered_df = filtered_df[filtered_df['patient'] == 1]
    elif patient_only is False:
        filtered_df = filtered_df[filtered_df['patient'] == 0]
    
    if group_filter:
        if isinstance(group_filter, str):
            group_filter = [group_filter]
        filtered_df = filtered_df[filtered_df['group'].isin(group_filter)]
    
    subjects = {}
    for _, row in filtered_df.iterrows():
        subject_id = row['sub']
        subj_dir = BASE_DIR / subject_id
        if not subj_dir.exists():
            continue
        
        sessions = sorted([d.name.replace('ses-', '') for d in subj_dir.glob('ses-*') if d.is_dir()], key=int)
        start_session = SESSION_START.get(subject_id, 1)
        sessions = [s for s in sessions if int(s) >= start_session]
        if not sessions:
            continue
        
        hemisphere = 'l' if row.get('intact_hemi', 'left') == 'left' else 'r'
        
        subjects[subject_id] = {
            'code': f"{row['group']}{subject_id.split('-')[1]}",
            'sessions': sessions,
            'hemi': hemisphere,
            'group': row['group'],
            'patient_status': 'patient' if row['patient'] == 1 else 'control',
            'surgery_side': row.get('SurgerySide', None)
        }
    return subjects

ALL_PATIENTS = load_subjects_by_group(patient_only=True)
ALL_CONTROLS = load_subjects_by_group(patient_only=False)
ANALYSIS_SUBJECTS = {**ALL_PATIENTS, **ALL_CONTROLS}

print(f"✓ Loaded {len(ANALYSIS_SUBJECTS)} subjects")
for g in ['OTC', 'nonOTC', 'control']:
    n = sum(1 for v in ANALYSIS_SUBJECTS.values() if v['group'] == g)
    print(f"  {g}: {n}")

✓ Loaded 25 subjects
  OTC: 7
  nonOTC: 9
  control: 9


In [7]:
# =============================================================================
# CELL 3: SPATIAL DRIFT - Peak-Based Extraction (Golarai Method)
# =============================================================================
# Following Golarai et al. (2015): Track peak voxel location across sessions
# Drift = Euclidean distance between T1 and T2 peak coordinates (mm)
# =============================================================================

def extract_peak_locations(subject_id, cope_map):
    """Extract peak voxel (max T-value) locations within search mask for each session"""
    
    info = ANALYSIS_SUBJECTS[subject_id]
    roi_dir = BASE_DIR / subject_id / f'ses-{info["sessions"][0]}' / 'ROIs'
    if not roi_dir.exists(): 
        return {}
    
    all_results = {}
    first_session = info['sessions'][0]

    for hemi in ['l', 'r']:
        for category, (cope_num, multiplier) in cope_map.items():
            
            mask_file = roi_dir / f'{hemi}_{category}_searchmask.nii.gz'
            if not mask_file.exists(): 
                continue
            
            try:
                search_mask_img = nib.load(mask_file)
                search_mask = search_mask_img.get_fdata() > 0
                affine = search_mask_img.affine
            except: 
                continue
            
            hemi_key = f'{hemi}_{category}'
            all_results[hemi_key] = {}
            
            for session in info['sessions']:
                feat_dir = BASE_DIR / subject_id / f'ses-{session}' / 'derivatives' / 'fsl' / 'loc' / 'HighLevel.gfeat'
                z_name = 'zstat1.nii.gz' if session == first_session else f'zstat1_ses{first_session}.nii.gz'
                cope_file = feat_dir / f'cope{cope_num}.feat' / 'stats' / z_name
                
                if not cope_file.exists(): 
                    continue
                
                try:
                    z_data = nib.load(cope_file).get_fdata() * multiplier
                    z_masked = np.where(search_mask & (z_data > 0), z_data, -np.inf)
                    peak_idx = np.unravel_index(np.argmax(z_masked), z_masked.shape)
                    peak_z = z_data[peak_idx]
                    
                    if peak_z <= 0:
                        continue
                    
                    peak_mni = nib.affines.apply_affine(affine, peak_idx)
                    
                    all_results[hemi_key][session] = {
                        'peak_idx': peak_idx,
                        'peak_mni': peak_mni,
                        'peak_z': peak_z
                    }
                except Exception as e:
                    print(f"Error {subject_id} {hemi_key} ses-{session}: {e}")
                    
    return all_results


def compute_peak_drift(peak_results, subjects_dict):
    """Compute Euclidean distance between T1 and T2 peak locations (mm)"""
    
    results = []
    
    for sid, rois in peak_results.items():
        if sid in EXCLUDE_SUBS:
            continue
        info = subjects_dict.get(sid, {})
        
        for roi_key, sessions_data in rois.items():
            sessions = sorted(sessions_data.keys())
            if len(sessions) < 2:
                continue
            
            peak_t1 = np.array(sessions_data[sessions[0]]['peak_mni'])
            peak_t2 = np.array(sessions_data[sessions[-1]]['peak_mni'])
            drift_mm = np.linalg.norm(peak_t2 - peak_t1)
            
            t1_z = sessions_data[sessions[0]]['peak_z']
            hemi = roi_key.split('_')[0]
            category = roi_key.split('_')[1]
            
            results.append({
                'subject': sid,
                'code': subjects_dict[sid].get('code', sid),
                'group': subjects_dict[sid].get('group', 'unknown'),
                'hemi': hemi,
                'category': category,
                'category_type': 'Bilateral' if category in ['object', 'house'] else 'Unilateral',
                'peak_drift_mm': drift_mm,
                't1_peak_z': t1_z,
                't2_peak_z': sessions_data[sessions[-1]]['peak_z'],
                'flag': 'WEAK_SIGNAL' if t1_z < 2.3 else ''
            })
    
    return pd.DataFrame(results)


# Run extraction
print("Extracting Peak Locations (Golarai method)...")
peak_locations = {}
for sub in ANALYSIS_SUBJECTS:
    if sub not in EXCLUDE_SUBS:
        res = extract_peak_locations(sub, COPE_MAP_DIFFERENTIAL)
        if res: 
            peak_locations[sub] = res

print("Computing Peak-Based Drift...")
drift_peak = compute_peak_drift(peak_locations, ANALYSIS_SUBJECTS)

# Add hemisphere info
drift_peak['intact_hemi'] = drift_peak['subject'].map(lambda s: ANALYSIS_SUBJECTS[s]['hemi'])

print(f"\n✓ {drift_peak['subject'].nunique()} subjects, {len(drift_peak)} measurements")
print(f"  Excluded: {EXCLUDE_SUBS}")

# Save
drift_peak.to_csv('drift_peak_golarai.csv', index=False)
print("✓ Saved to drift_peak_golarai.csv")

Extracting Peak Locations (Golarai method)...
Computing Peak-Based Drift...

✓ 20 subjects, 108 measurements
  Excluded: ['sub-025', 'sub-027', 'sub-045', 'sub-072']
✓ Saved to drift_peak_golarai.csv


In [23]:
# =============================================================================
# CELL 4 (UPDATED): SPATIAL DRIFT - Descriptives and Statistical Tests
# =============================================================================
# Fixed hemisphere selection:
#   OTC/nonOTC: intact hemisphere only
#   Controls unilateral: face=R, word=L
#   Controls bilateral: match to OTC intact hemisphere (separate by hemi)
#   nonOTC bilateral: intact hemisphere only
# =============================================================================

df_drift = drift_peak.copy()

# --- UNILATERAL CATEGORIES ---
# OTC/nonOTC: intact hemisphere only
otc_uni = df_drift[(df_drift['group'] == 'OTC') & 
                   (df_drift['hemi'] == df_drift['intact_hemi']) &
                   (df_drift['category'].isin(['face', 'word']))]

nonotc_uni = df_drift[(df_drift['group'] == 'nonOTC') & 
                      (df_drift['hemi'] == df_drift['intact_hemi']) &
                      (df_drift['category'].isin(['face', 'word']))]

# Controls: face=R only, word=L only (NOT pooled)
ctrl = df_drift[df_drift['group'] == 'control']
ctrl_face = ctrl[(ctrl['category'] == 'face') & (ctrl['hemi'] == 'r')]
ctrl_word = ctrl[(ctrl['category'] == 'word') & (ctrl['hemi'] == 'l')]

# --- BILATERAL CATEGORIES ---
# OTC/nonOTC: intact hemisphere only
otc_bil = df_drift[(df_drift['group'] == 'OTC') & 
                   (df_drift['hemi'] == df_drift['intact_hemi']) &
                   (df_drift['category'].isin(['object', 'house']))]

nonotc_bil = df_drift[(df_drift['group'] == 'nonOTC') & 
                      (df_drift['hemi'] == df_drift['intact_hemi']) &
                      (df_drift['category'].isin(['object', 'house']))]

# Controls bilateral: separate by hemisphere to match OTC
ctrl_bil_L = ctrl[(ctrl['category'].isin(['object', 'house'])) & (ctrl['hemi'] == 'l')]
ctrl_bil_R = ctrl[(ctrl['category'].isin(['object', 'house'])) & (ctrl['hemi'] == 'r')]

# ============================================================
# DESCRIPTIVES - UNILATERAL (by category)
# ============================================================
print("=" * 85)
print("UNILATERAL CATEGORIES - By Category and Hemisphere")
print("=" * 85)

print(f"\n{'Category':<10} {'Group':<15} {'Hemisphere':<12} {'n':>4} {'Mean±SD (mm)':>18}")
print("-" * 65)

# Faces
otc_face = otc_uni[otc_uni['category'] == 'face']
nonotc_face = nonotc_uni[nonotc_uni['category'] == 'face']
print(f"{'Face':<10} {'OTC':<15} {'intact':<12} {len(otc_face):>4} {otc_face['peak_drift_mm'].mean():>7.1f} ± {otc_face['peak_drift_mm'].std():.1f}")
print(f"{'Face':<10} {'nonOTC':<15} {'intact':<12} {len(nonotc_face):>4} {nonotc_face['peak_drift_mm'].mean():>7.1f} ± {nonotc_face['peak_drift_mm'].std():.1f}")
print(f"{'Face':<10} {'Control':<15} {'R only':<12} {len(ctrl_face):>4} {ctrl_face['peak_drift_mm'].mean():>7.1f} ± {ctrl_face['peak_drift_mm'].std():.1f}")

# Words
otc_word = otc_uni[otc_uni['category'] == 'word']
nonotc_word = nonotc_uni[nonotc_uni['category'] == 'word']
print(f"{'Word':<10} {'OTC':<15} {'intact':<12} {len(otc_word):>4} {otc_word['peak_drift_mm'].mean():>7.1f} ± {otc_word['peak_drift_mm'].std():.1f}")
print(f"{'Word':<10} {'nonOTC':<15} {'intact':<12} {len(nonotc_word):>4} {nonotc_word['peak_drift_mm'].mean():>7.1f} ± {nonotc_word['peak_drift_mm'].std():.1f}")
print(f"{'Word':<10} {'Control':<15} {'L only':<12} {len(ctrl_word):>4} {ctrl_word['peak_drift_mm'].mean():>7.1f} ± {ctrl_word['peak_drift_mm'].std():.1f}")

# ============================================================
# DESCRIPTIVES - BILATERAL (by hemisphere)
# ============================================================
print("\n" + "=" * 85)
print("BILATERAL CATEGORIES - By Hemisphere")
print("=" * 85)

# Split OTC bilateral by intact hemisphere
otc_bil_L = otc_bil[otc_bil['hemi'] == 'l']
otc_bil_R = otc_bil[otc_bil['hemi'] == 'r']
nonotc_bil_L = nonotc_bil[nonotc_bil['hemi'] == 'l']
nonotc_bil_R = nonotc_bil[nonotc_bil['hemi'] == 'r']

print(f"\n{'Hemisphere':<12} {'Group':<15} {'n':>4} {'Mean±SD (mm)':>18}")
print("-" * 55)
print(f"{'Left':<12} {'OTC':<15} {len(otc_bil_L):>4} {otc_bil_L['peak_drift_mm'].mean():>7.1f} ± {otc_bil_L['peak_drift_mm'].std():.1f}")
print(f"{'Left':<12} {'nonOTC':<15} {len(nonotc_bil_L):>4} {nonotc_bil_L['peak_drift_mm'].mean():>7.1f} ± {nonotc_bil_L['peak_drift_mm'].std():.1f}")
print(f"{'Left':<12} {'Control':<15} {len(ctrl_bil_L):>4} {ctrl_bil_L['peak_drift_mm'].mean():>7.1f} ± {ctrl_bil_L['peak_drift_mm'].std():.1f}")
print(f"{'Right':<12} {'OTC':<15} {len(otc_bil_R):>4} {otc_bil_R['peak_drift_mm'].mean():>7.1f} ± {otc_bil_R['peak_drift_mm'].std():.1f}")
print(f"{'Right':<12} {'nonOTC':<15} {len(nonotc_bil_R):>4} {nonotc_bil_R['peak_drift_mm'].mean():>7.1f} ± {nonotc_bil_R['peak_drift_mm'].std():.1f}")
print(f"{'Right':<12} {'Control':<15} {len(ctrl_bil_R):>4} {ctrl_bil_R['peak_drift_mm'].mean():>7.1f} ± {ctrl_bil_R['peak_drift_mm'].std():.1f}")

print("\n" + "=" * 85)

UNILATERAL CATEGORIES - By Category and Hemisphere

Category   Group           Hemisphere      n       Mean±SD (mm)
-----------------------------------------------------------------
Face       OTC             intact          6     6.6 ± 7.8
Face       nonOTC          intact          7     2.8 ± 5.0
Face       Control         R only          7     1.4 ± 1.0
Word       OTC             intact          6    13.7 ± 12.7
Word       nonOTC          intact          7    10.8 ± 8.1
Word       Control         L only          7    14.2 ± 10.9

BILATERAL CATEGORIES - By Hemisphere

Hemisphere   Group              n       Mean±SD (mm)
-------------------------------------------------------
Left         OTC                4    12.4 ± 7.6
Left         nonOTC             8     2.4 ± 1.8
Left         Control           14    11.5 ± 13.6
Right        OTC                8     9.3 ± 16.5
Right        nonOTC             6     6.9 ± 6.7
Right        Control           14     7.2 ± 6.4



In [9]:
# =============================================================================
# CELL 5: DIRECTIONAL DRIFT - Extract Signed Coordinate Differences
# =============================================================================
# This extracts signed (T2 - T1) differences for each coordinate axis
# to test whether drift is SYSTEMATIC (directional) or just NOISE
# =============================================================================

def extract_directional_drift(peak_results, subjects_dict):
    """
    Extract signed coordinate differences (T2 - T1) for each ROI.
    Returns DataFrame with delta_x, delta_y, delta_z (signed) plus euclidean_dist.
    """
    results = []
    
    for sid, rois in peak_results.items():
        if sid in EXCLUDE_SUBS:
            continue
            
        info = subjects_dict.get(sid, {})
        group = info.get('group', 'control')
        if info.get('patient_status') == 'control': 
            group = 'Control'
        elif group == 'nonOTC':
            group = 'nonOTC'
        elif group == 'OTC':
            group = 'OTC'
        
        for roi_key, sessions_data in rois.items():
            sessions = sorted(sessions_data.keys())
            if len(sessions) < 2:
                continue
            
            hemi = roi_key.split('_')[0]
            cat = roi_key.split('_')[1]
            
            peak_t1 = sessions_data[sessions[0]]['peak_mni']
            peak_t2 = sessions_data[sessions[-1]]['peak_mni']
            
            # Signed differences (T2 - T1)
            dx = peak_t2[0] - peak_t1[0]  # medial-lateral
            dy = peak_t2[1] - peak_t1[1]  # anterior-posterior
            dz = peak_t2[2] - peak_t1[2]  # superior-inferior
            
            # Euclidean distance
            euclidean = np.sqrt(dx**2 + dy**2 + dz**2)
            
            results.append({
                'subject': sid,
                'code': info.get('code', sid),
                'group': group,
                'hemisphere': hemi,
                'category': cat,
                'category_type': 'Bilateral' if cat in ['object', 'house'] else 'Unilateral',
                'intact_hemi': info.get('hemi', 'unknown'),
                'x_T1': peak_t1[0], 'y_T1': peak_t1[1], 'z_T1': peak_t1[2],
                'x_T2': peak_t2[0], 'y_T2': peak_t2[1], 'z_T2': peak_t2[2],
                'delta_x': dx,
                'delta_y': dy,
                'delta_z': dz,
                'euclidean_dist': euclidean,
                't1_peak_z': sessions_data[sessions[0]]['peak_z'],
                't2_peak_z': sessions_data[sessions[-1]]['peak_z'],
                'T1_session': sessions[0],
                'T2_session': sessions[-1]
            })
    
    return pd.DataFrame(results)

# Extract directional drift
dir_drift_df = extract_directional_drift(peak_locations, ANALYSIS_SUBJECTS)

print(f"✓ Extracted {len(dir_drift_df)} ROIs with directional drift data")
print(f"  OTC: {len(dir_drift_df[dir_drift_df['group'] == 'OTC'])}")
print(f"  nonOTC: {len(dir_drift_df[dir_drift_df['group'] == 'nonOTC'])}")
print(f"  Control: {len(dir_drift_df[dir_drift_df['group'] == 'Control'])}")

dir_drift_df.head()

✓ Extracted 108 ROIs with directional drift data
  OTC: 24
  nonOTC: 28
  Control: 56


Unnamed: 0,subject,code,group,hemisphere,category,category_type,intact_hemi,x_T1,y_T1,z_T1,...,y_T2,z_T2,delta_x,delta_y,delta_z,euclidean_dist,t1_peak_z,t2_peak_z,T1_session,T2_session
0,sub-004,OTC004,OTC,l,face,Unilateral,l,-27.5,-32.0,-9.0,...,-52.0,0.0,-4.0,-20.0,9.0,22.293497,3.500539,13.334448,1,6
1,sub-004,OTC004,OTC,l,word,Unilateral,l,-29.5,-19.0,6.0,...,-18.0,1.0,4.0,1.0,-5.0,6.480741,4.249987,2.070664,1,6
2,sub-004,OTC004,OTC,l,object,Bilateral,l,-32.5,-75.0,13.0,...,-66.0,11.0,0.0,9.0,-2.0,9.219544,6.774813,11.288923,1,6
3,sub-004,OTC004,OTC,l,house,Bilateral,l,-19.5,-64.0,8.0,...,-42.0,10.0,-3.0,22.0,2.0,22.293497,5.253273,12.639606,1,6
4,sub-007,nonOTC007,nonOTC,r,face,Unilateral,r,34.5,-41.0,4.0,...,-42.0,5.0,1.0,-1.0,1.0,1.732051,9.558907,14.58206,1,4


In [25]:
# =============================================================================
# CELL 6 (UPDATED): DIRECTIONAL DRIFT - Filter by Appropriate Hemisphere
# =============================================================================
# Fixed: Controls bilateral now separated by hemisphere, nonOTC uses intact hemi
# =============================================================================

def filter_by_hemisphere_corrected(df):
    """Apply corrected hemisphere selection rules"""
    
    # OTC: intact hemisphere only (all categories)
    otc = df[(df['group'] == 'OTC') & (df['hemisphere'] == df['intact_hemi'])]
    
    # nonOTC: intact hemisphere only (all categories)
    nonotc = df[(df['group'] == 'nonOTC') & (df['hemisphere'] == df['intact_hemi'])]
    
    # Controls unilateral: face=R, word=L
    ctrl = df[df['group'] == 'Control']
    ctrl_face = ctrl[(ctrl['category'] == 'face') & (ctrl['hemisphere'] == 'r')]
    ctrl_word = ctrl[(ctrl['category'] == 'word') & (ctrl['hemisphere'] == 'l')]
    
    # Controls bilateral: keep hemisphere info, will match to OTC in comparisons
    # For now include both but tag hemisphere - actual matching happens at comparison time
    ctrl_bilateral = ctrl[ctrl['category'].isin(['object', 'house'])]
    
    ctrl_filtered = pd.concat([ctrl_face, ctrl_word, ctrl_bilateral])
    
    return pd.concat([otc, nonotc, ctrl_filtered])

# Apply corrected filter
dir_drift_filtered = filter_by_hemisphere_corrected(dir_drift_df)

# Add surgery_side for reorganization analysis
surgery_map = df.set_index('sub')['SurgerySide'].to_dict()
dir_drift_filtered['surgery_side'] = dir_drift_filtered['subject'].map(surgery_map)

# Add reorganization status
def get_reorg_status(row):
    if row['group'] != 'OTC':
        return 'N/A'
    if row['category'] not in ['face', 'word']:
        return 'N/A'
    if row['surgery_side'] == 'left' and row['category'] == 'word':
        return 'Reorganizing'
    elif row['surgery_side'] == 'right' and row['category'] == 'face':
        return 'Reorganizing'
    else:
        return 'Typical'

dir_drift_filtered['reorg_status'] = dir_drift_filtered.apply(get_reorg_status, axis=1)

# Summary
print("✓ After corrected hemisphere filtering:")
print(f"  OTC: {len(dir_drift_filtered[dir_drift_filtered['group'] == 'OTC'])}")
print(f"  nonOTC: {len(dir_drift_filtered[dir_drift_filtered['group'] == 'nonOTC'])}")
print(f"  Control unilateral: {len(dir_drift_filtered[(dir_drift_filtered['group'] == 'Control') & (dir_drift_filtered['category'].isin(['face', 'word']))])}")
print(f"  Control bilateral: {len(dir_drift_filtered[(dir_drift_filtered['group'] == 'Control') & (dir_drift_filtered['category'].isin(['object', 'house']))])}")

✓ After corrected hemisphere filtering:
  OTC: 24
  nonOTC: 28
  Control unilateral: 14
  Control bilateral: 28


In [11]:
# =============================================================================
# CELL 7: DIRECTIONAL DRIFT - Statistical Tests for Systematic Drift
# =============================================================================
# H0: Mean drift = 0 (peaks fluctuate randomly around stable location)
# If p > 0.05 → No systematic drift (CONSISTENT with Dehaene-Lambertz)
# =============================================================================

def test_systematic_drift(df, group_col='group'):
    """
    Test whether drift is systematic (mean ≠ 0) for each group.
    """
    results = []
    coord_labels = {'delta_x': 'Medial-Lateral', 'delta_y': 'Anterior-Posterior', 'delta_z': 'Superior-Inferior'}
    
    for group in df[group_col].unique():
        group_data = df[df[group_col] == group]
        
        for coord in ['delta_x', 'delta_y', 'delta_z']:
            values = group_data[coord].dropna()
            n = len(values)
            
            if n < 3:
                continue
            
            # One-sample t-test: H0 = mean is 0
            t_stat, p_val = ttest_1samp(values, 0)
            
            # Effect size (Cohen's d)
            cohens_d = values.mean() / values.std() if values.std() > 0 else 0
            
            # 95% CI
            sem = values.sem()
            ci_low = values.mean() - 1.96 * sem
            ci_high = values.mean() + 1.96 * sem
            
            results.append({
                'Group': group,
                'Axis': coord_labels[coord],
                'n': n,
                'Mean (mm)': round(values.mean(), 2),
                'SD': round(values.std(), 2),
                '95% CI': f"[{ci_low:.1f}, {ci_high:.1f}]",
                't': round(t_stat, 2),
                'p': round(p_val, 4),
                "Cohen's d": round(cohens_d, 2),
                'Systematic?': 'Yes*' if p_val < 0.05 else 'No'
            })
    
    return pd.DataFrame(results)

# Run the test
drift_stats = test_systematic_drift(dir_drift_filtered)

print("=" * 90)
print("TEST FOR SYSTEMATIC DRIFT (One-sample t-test: H0 = mean drift is 0)")
print("=" * 90)
print("\nIf p > 0.05: No systematic drift → CONSISTENT with literature")
print("If p < 0.05: Systematic drift in that direction\n")
print(drift_stats.to_string(index=False))
print("\n" + "=" * 90)

TEST FOR SYSTEMATIC DRIFT (One-sample t-test: H0 = mean drift is 0)

If p > 0.05: No systematic drift → CONSISTENT with literature
If p < 0.05: Systematic drift in that direction

  Group               Axis  n  Mean (mm)    SD      95% CI     t      p  Cohen's d Systematic?
    OTC     Medial-Lateral 24      -0.36  3.94 [-1.9, 1.2] -0.44 0.6612      -0.09          No
    OTC Anterior-Posterior 24       4.15 14.40 [-1.6, 9.9]  1.41 0.1718       0.29          No
    OTC  Superior-Inferior 24       0.20  3.81 [-1.3, 1.7]  0.26 0.8009       0.05          No
 nonOTC     Medial-Lateral 28      -0.75  4.36 [-2.4, 0.9] -0.92 0.3680      -0.17          No
 nonOTC Anterior-Posterior 28       1.42  6.71 [-1.1, 3.9]  1.12 0.2721       0.21          No
 nonOTC  Superior-Inferior 28      -0.12  2.71 [-1.1, 0.9] -0.24 0.8121      -0.05          No
Control     Medial-Lateral 42       1.44  6.16 [-0.4, 3.3]  1.52 0.1374       0.23          No
Control Anterior-Posterior 42      -1.57 10.61 [-4.8, 1.6] -

In [12]:
# =============================================================================
# CELL 8: DIRECTIONAL DRIFT - Test by Category
# =============================================================================

def test_drift_by_group_category(df):
    """Test systematic drift for each group × category combination."""
    results = []
    
    for group in ['OTC', 'nonOTC', 'Control']:
        for category in ['face', 'word', 'object', 'house']:
            subset = df[(df['group'] == group) & (df['category'] == category)]
            
            if len(subset) < 3:
                continue
            
            row = {'Group': group, 'Category': category, 'n': len(subset)}
            
            for coord, label in [('delta_x', 'Δx'), ('delta_y', 'Δy'), ('delta_z', 'Δz')]:
                values = subset[coord].dropna()
                if len(values) >= 3:
                    t_stat, p_val = ttest_1samp(values, 0)
                    sig = '*' if p_val < 0.05 else ''
                    row[f'{label} mean'] = f"{values.mean():.1f}{sig}"
                else:
                    row[f'{label} mean'] = 'n/a'
            
            results.append(row)
    
    return pd.DataFrame(results)

category_drift = test_drift_by_group_category(dir_drift_filtered)
print("\nSYSTEMATIC DRIFT BY GROUP × CATEGORY (* = p < 0.05):")
print(category_drift.to_string(index=False))


SYSTEMATIC DRIFT BY GROUP × CATEGORY (* = p < 0.05):
  Group Category  n Δx mean Δy mean Δz mean
    OTC     face  6    -0.3    -2.4     0.8
    OTC     word  6     2.6     7.9    -0.2
    OTC   object  6     0.2     1.7    -0.9
    OTC    house  6    -3.8     9.3    1.0*
 nonOTC     face  7     1.1     1.7     0.2
 nonOTC     word  7    -4.0     2.9    -0.4
 nonOTC   object  7     0.4     2.2    -0.4
 nonOTC    house  7    -0.6    -1.1     0.0
Control     face  7     0.1     0.7     0.1
Control     word  7    -0.1    -2.0    -2.9
Control   object 14     3.0     0.9    -0.6
Control    house 14     1.3    -5.0     1.6


In [13]:
# =============================================================================
# CELL 9: DIRECTIONAL DRIFT - ANOVA: Do Groups Differ in Drift Direction?
# =============================================================================

print("\n" + "=" * 80)
print("ANOVA: Do groups differ in drift DIRECTION?")
print("=" * 80)

for coord, label in [('delta_x', 'Medial-Lateral'), 
                     ('delta_y', 'Anterior-Posterior'), 
                     ('delta_z', 'Superior-Inferior')]:
    
    groups_data = []
    group_names = []
    for g in ['OTC', 'nonOTC', 'Control']:
        vals = dir_drift_filtered[dir_drift_filtered['group'] == g][coord].dropna()
        if len(vals) > 0:
            groups_data.append(vals)
            group_names.append(g)
    
    if len(groups_data) >= 2:
        f_stat, p_val = f_oneway(*groups_data)
        result = '→ GROUPS DIFFER' if p_val < 0.05 else '→ No group differences'
        print(f"\n{label} (Δ{coord[-1]}):")
        print(f"  F = {f_stat:.3f}, p = {p_val:.4f} {result}")
        
        # Post-hoc if significant
        if p_val < 0.05:
            for i, g1 in enumerate(group_names):
                for g2 in group_names[i+1:]:
                    t, p = ttest_ind(
                        dir_drift_filtered[dir_drift_filtered['group'] == g1][coord].dropna(),
                        dir_drift_filtered[dir_drift_filtered['group'] == g2][coord].dropna()
                    )
                    sig = '*' if p < 0.05 else ''
                    print(f"    {g1} vs {g2}: t={t:.2f}, p={p:.4f}{sig}")


ANOVA: Do groups differ in drift DIRECTION?

Medial-Lateral (Δx):
  F = 1.800, p = 0.1711 → No group differences

Anterior-Posterior (Δy):
  F = 2.211, p = 0.1154 → No group differences

Superior-Inferior (Δz):
  F = 0.043, p = 0.9583 → No group differences


In [None]:
# =============================================================================
# CELL 10: VISUALIZATION - Box Plots of Signed Drift
# =============================================================================

fig, axes = plt.subplots(1, 3, figsize=(14, 5))

coord_info = [
    ('delta_x', 'Δx: Medial ← → Lateral'),
    ('delta_y', 'Δy: Posterior ← → Anterior'), 
    ('delta_z', 'Δz: Inferior ← → Superior')
]

group_order = ['OTC', 'nonOTC', 'Control']
palette = {'OTC': 'coral', 'nonOTC': 'skyblue', 'Control': 'lightgreen'}

for ax, (coord, title) in zip(axes, coord_info):
    sns.boxplot(data=dir_drift_filtered, x='group', y=coord, 
                order=group_order, palette=palette, ax=ax)
    
    sns.stripplot(data=dir_drift_filtered, x='group', y=coord,
                  order=group_order, color='black', alpha=0.5, size=4, ax=ax)
    
    # Reference line at 0 (no systematic drift)
    ax.axhline(0, color='red', linestyle='--', linewidth=2, alpha=0.7)
    
    ax.set_xlabel('')
    ax.set_ylabel(f'{title.split(":")[0]} (mm)')
    ax.set_title(title)
    
    # Annotate means
    for i, group in enumerate(group_order):
        grp_data = dir_drift_filtered[dir_drift_filtered['group'] == group][coord]
        if len(grp_data) > 0:
            mean = grp_data.mean()
            sem = grp_data.sem()
            ax.annotate(f'{mean:.1f}±{sem:.1f}', 
                       xy=(i, ax.get_ylim()[1] * 0.85),
                       ha='center', fontsize=9, fontweight='bold')

plt.suptitle('Signed Coordinate Drift by Group\n(Centered on 0 = no systematic directional drift)', 
             fontsize=12, y=1.02)
plt.tight_layout()
plt.savefig('directional_drift_boxplots.png', dpi=150, bbox_inches='tight')
plt.show()

In [None]:
# =============================================================================
# CELL 11: VISUALIZATION - Arrow Plot Showing Drift Directions
# =============================================================================

fig, axes = plt.subplots(1, 3, figsize=(15, 5))

category_colors = {'face': '#FFB347', 'word': '#6495ED', 
                   'object': '#40E0D0', 'house': '#B39EB5'}

for ax, group in zip(axes, ['OTC', 'nonOTC', 'Control']):
    group_data = dir_drift_filtered[dir_drift_filtered['group'] == group]
    
    # Plot arrows from origin (y=delta_y for A-P, z=delta_z for S-I)
    for _, row in group_data.iterrows():
        ax.arrow(0, 0, row['delta_y'], row['delta_z'], 
                head_width=0.8, head_length=0.5, 
                fc=category_colors.get(row['category'], 'gray'),
                ec=category_colors.get(row['category'], 'gray'),
                alpha=0.6, linewidth=1.5)
    
    # Reference circle at 10mm (typical noise level)
    circle = plt.Circle((0, 0), 10, fill=False, linestyle='--', 
                        color='gray', alpha=0.5, linewidth=2)
    ax.add_patch(circle)
    
    ax.axhline(0, color='black', linewidth=0.5, alpha=0.3)
    ax.axvline(0, color='black', linewidth=0.5, alpha=0.3)
    ax.set_xlim(-30, 30)
    ax.set_ylim(-30, 30)
    ax.set_aspect('equal')
    ax.set_xlabel('Δy: Anterior-Posterior (mm)')
    ax.set_ylabel('Δz: Superior-Inferior (mm)')
    ax.set_title(f'{group} (n={len(group_data)})')

# Legend
handles = [plt.Line2D([0], [0], color=c, marker='>', linestyle='', 
                       markersize=10, label=cat.title()) 
           for cat, c in category_colors.items()]
fig.legend(handles=handles, loc='center right', bbox_to_anchor=(1.08, 0.5))

plt.suptitle('Drift Direction (Y-Z plane)\nArrows show T2 - T1 displacement\n'
             '(Random directions = no systematic drift)', fontsize=11, y=1.05)
plt.tight_layout()
plt.savefig('directional_drift_arrows.png', dpi=150, bbox_inches='tight')
plt.show()

print("Interpretation: Random arrow directions → measurement noise (matches literature)")
print("               Clustered arrows → systematic spatial shift")

In [None]:
# =============================================================================
# CELL 12: VISUALIZATION - Signed vs Unsigned Comparison
# =============================================================================

fig, axes = plt.subplots(1, 2, figsize=(12, 5))

# Left: Euclidean (unsigned) - original analysis
ax = axes[0]
sns.boxplot(data=dir_drift_filtered, x='group', y='euclidean_dist',
            order=['OTC', 'nonOTC', 'Control'],
            palette=palette, ax=ax)
ax.set_ylabel('Euclidean Distance (mm)')
ax.set_xlabel('')
ax.set_title('Unsigned Drift (Euclidean)\nAlways positive - measures variability')

# Right: Mean signed drift (should be ~0 if no systematic drift)
ax = axes[1]
summary = dir_drift_filtered.groupby('group')[['delta_x', 'delta_y', 'delta_z']].mean()
summary = summary.reindex(['OTC', 'nonOTC', 'Control'])
summary.plot(kind='bar', ax=ax, width=0.8, 
             color=['#e74c3c', '#3498db', '#2ecc71'])
ax.axhline(0, color='black', linewidth=1.5)
ax.set_ylabel('Mean Signed Drift (mm)')
ax.set_xlabel('')
ax.set_title('Mean Signed Drift by Axis\nShould be ~0 if no systematic direction')
ax.set_xticklabels(['OTC', 'nonOTC', 'Control'], rotation=0)
ax.legend(title='Axis', labels=['M-L', 'A-P', 'S-I'])

plt.tight_layout()
plt.savefig('signed_vs_unsigned_comparison.png', dpi=150, bbox_inches='tight')
plt.show()

In [17]:
# =============================================================================
# CELL 13: Summary and Interpretation
# =============================================================================

print("\n" + "=" * 80)
print("SUMMARY: Directional Drift Analysis")
print("=" * 80)

# Summary table
summary = dir_drift_filtered.groupby('group').agg({
    'delta_x': ['mean', 'std'],
    'delta_y': ['mean', 'std'],
    'delta_z': ['mean', 'std'],
    'euclidean_dist': ['mean', 'std', 'count']
}).round(2)

print("\nMean ± SD by Group:")
print(summary)

print("""
================================================================================
INTERPRETATION
================================================================================

1. SIGNED DRIFT (delta_x, delta_y, delta_z):
   - If means ≈ 0 with p > 0.05: Peaks fluctuate randomly around stable location
   - This matches Dehaene-Lambertz (2018): "no linear effect of session" (F < 1)

2. UNSIGNED DRIFT (Euclidean distance):
   - Your original values (5-15mm) reflect VARIABILITY/noise
   - Expected even with perfectly stable peaks due to:
     • Voxel resolution (3mm isotropic)
     • Registration noise (1-3mm)
     • Threshold effects on peak voxel selection

3. KEY FINDING:
   - If signed means ≈ 0 AND p > 0.05: Results MATCH the literature
   - Euclidean distances measure variability, not systematic drift
   - Both measures are consistent - just answering different questions

4. OTC vs CONTROLS:
   - If no group differences in signed drift direction → OTC peaks are as
     spatially stable as Controls (reorganization occurs WITHIN stable locations)
================================================================================
""")

# Save results
dir_drift_filtered.to_csv('directional_drift_results.csv', index=False)
drift_stats.to_csv('directional_drift_statistics.csv', index=False)
print("✓ Saved: directional_drift_results.csv, directional_drift_statistics.csv")


SUMMARY: Directional Drift Analysis

Mean ± SD by Group:
        delta_x       delta_y        delta_z       euclidean_dist             
           mean   std    mean    std    mean   std           mean    std count
group                                                                         
Control    1.44  6.16   -1.57  10.61   -0.10  5.65           8.84  10.34    42
OTC       -0.36  3.94    4.15  14.40    0.20  3.81          10.24  12.10    24
nonOTC    -0.75  4.36    1.42   6.71   -0.12  2.71           5.56   6.48    28

INTERPRETATION

1. SIGNED DRIFT (delta_x, delta_y, delta_z):
   - If means ≈ 0 with p > 0.05: Peaks fluctuate randomly around stable location
   - This matches Dehaene-Lambertz (2018): "no linear effect of session" (F < 1)

2. UNSIGNED DRIFT (Euclidean distance):
   - Your original values (5-15mm) reflect VARIABILITY/noise
   - Expected even with perfectly stable peaks due to:
     • Voxel resolution (3mm isotropic)
     • Registration noise (1-3mm)
     • Thresh

In [18]:
# =============================================================================
# CELL 14: Reorganization Status Analysis (OTC only)
# =============================================================================
# Tests whether "reorganizing" categories (faces in LH, words in RH) show
# different spatial drift than "typical" categories
# =============================================================================

# 1. Add surgery_side to dir_drift_filtered
surgery_map = df.set_index('sub')['SurgerySide'].to_dict()
dir_drift_filtered['surgery_side'] = dir_drift_filtered['subject'].map(surgery_map)

# 2. Create reorganization_status (OTC only, unilateral categories only)
def get_reorg_status(row):
    if row['group'] != 'OTC':
        return 'N/A'
    if row['category'] not in ['face', 'word']:
        return 'N/A'  # bilateral categories excluded for this analysis
    
    # Reorganizing = category normally lateralized to the RESECTED side
    if row['surgery_side'] == 'left' and row['category'] == 'word':
        return 'Reorganizing'
    elif row['surgery_side'] == 'right' and row['category'] == 'face':
        return 'Reorganizing'
    else:
        return 'Typical'

dir_drift_filtered['reorg_status'] = dir_drift_filtered.apply(get_reorg_status, axis=1)

# 3. Descriptive stats
print("=" * 80)
print("REORGANIZATION STATUS: Descriptive Statistics")
print("=" * 80)

otc_reorg = dir_drift_filtered[dir_drift_filtered['reorg_status'] == 'Reorganizing']
otc_typical = dir_drift_filtered[dir_drift_filtered['reorg_status'] == 'Typical']
ctrl_unilateral = dir_drift_filtered[
    (dir_drift_filtered['group'] == 'Control') & 
    (dir_drift_filtered['category'].isin(['face', 'word']))
]

print(f"\nOTC Reorganizing (n={len(otc_reorg)}):")
print(f"  Subjects: {otc_reorg['subject'].unique().tolist()}")
print(f"  Categories: {otc_reorg['category'].value_counts().to_dict()}")
print(f"  Euclidean drift: {otc_reorg['euclidean_dist'].mean():.2f} ± {otc_reorg['euclidean_dist'].std():.2f} mm")

print(f"\nOTC Typical (n={len(otc_typical)}):")
print(f"  Subjects: {otc_typical['subject'].unique().tolist()}")
print(f"  Categories: {otc_typical['category'].value_counts().to_dict()}")
print(f"  Euclidean drift: {otc_typical['euclidean_dist'].mean():.2f} ± {otc_typical['euclidean_dist'].std():.2f} mm")

print(f"\nControl Unilateral (n={len(ctrl_unilateral)}):")
print(f"  Euclidean drift: {ctrl_unilateral['euclidean_dist'].mean():.2f} ± {ctrl_unilateral['euclidean_dist'].std():.2f} mm")

# 4. Statistical comparisons
print("\n" + "=" * 80)
print("STATISTICAL TESTS: Euclidean Drift")
print("=" * 80)

# Reorganizing vs Typical (within OTC)
if len(otc_reorg) >= 2 and len(otc_typical) >= 2:
    t, p = ttest_ind(otc_reorg['euclidean_dist'], otc_typical['euclidean_dist'])
    print(f"\nOTC Reorganizing vs OTC Typical:")
    print(f"  t = {t:.3f}, p = {p:.4f}")

# Reorganizing vs Control
if len(otc_reorg) >= 2 and len(ctrl_unilateral) >= 2:
    t, p = ttest_ind(otc_reorg['euclidean_dist'], ctrl_unilateral['euclidean_dist'])
    print(f"\nOTC Reorganizing vs Control:")
    print(f"  t = {t:.3f}, p = {p:.4f}")

# Typical vs Control
if len(otc_typical) >= 2 and len(ctrl_unilateral) >= 2:
    t, p = ttest_ind(otc_typical['euclidean_dist'], ctrl_unilateral['euclidean_dist'])
    print(f"\nOTC Typical vs Control:")
    print(f"  t = {t:.3f}, p = {p:.4f}")

# 5. Breakdown by specific category
print("\n" + "=" * 80)
print("BREAKDOWN BY CATEGORY")
print("=" * 80)

print("\nOTC Reorganizing:")
for cat in ['face', 'word']:
    subset = otc_reorg[otc_reorg['category'] == cat]
    if len(subset) > 0:
        print(f"  {cat}: {subset['euclidean_dist'].mean():.2f} ± {subset['euclidean_dist'].std():.2f} mm (n={len(subset)})")

print("\nOTC Typical:")
for cat in ['face', 'word']:
    subset = otc_typical[otc_typical['category'] == cat]
    if len(subset) > 0:
        print(f"  {cat}: {subset['euclidean_dist'].mean():.2f} ± {subset['euclidean_dist'].std():.2f} mm (n={len(subset)})")

print("\n" + "=" * 80)

REORGANIZATION STATUS: Descriptive Statistics

OTC Reorganizing (n=6):
  Subjects: ['sub-004', 'sub-008', 'sub-010', 'sub-017', 'sub-021', 'sub-079']
  Categories: {'word': 4, 'face': 2}
  Euclidean drift: 16.76 ± 11.99 mm

OTC Typical (n=6):
  Subjects: ['sub-004', 'sub-008', 'sub-010', 'sub-017', 'sub-021', 'sub-079']
  Categories: {'face': 4, 'word': 2}
  Euclidean drift: 3.53 ± 1.96 mm

Control Unilateral (n=14):
  Euclidean drift: 7.79 ± 10.00 mm

STATISTICAL TESTS: Euclidean Drift

OTC Reorganizing vs OTC Typical:
  t = 2.668, p = 0.0236

OTC Reorganizing vs Control:
  t = 1.736, p = 0.0997

OTC Typical vs Control:
  t = -1.020, p = 0.3210

BREAKDOWN BY CATEGORY

OTC Reorganizing:
  face: 13.70 ± 12.16 mm (n=2)
  word: 18.29 ± 13.45 mm (n=4)

OTC Typical:
  face: 3.06 ± 1.68 mm (n=4)
  word: 4.47 ± 2.85 mm (n=2)



In [19]:
# =============================================================================
# CELL 15: Paired T-Test (Within-Subject Comparison)
# =============================================================================
# Each OTC subject has one "Reorganizing" and one "Typical" unilateral category
# Paired test is more appropriate and increases power
# =============================================================================

# Pivot to get one row per subject with both conditions
otc_unilateral = dir_drift_filtered[
    (dir_drift_filtered['group'] == 'OTC') & 
    (dir_drift_filtered['reorg_status'].isin(['Reorganizing', 'Typical']))
]

paired_data = otc_unilateral.pivot_table(
    index='subject', 
    columns='reorg_status', 
    values='euclidean_dist'
).dropna()

print("=" * 70)
print("PAIRED ANALYSIS: Reorganizing vs Typical (Within-Subject)")
print("=" * 70)

print(f"\n{'Subject':<12} {'Reorganizing':>14} {'Typical':>10} {'Difference':>12}")
print("-" * 50)

for subj in paired_data.index:
    reorg = paired_data.loc[subj, 'Reorganizing']
    typical = paired_data.loc[subj, 'Typical']
    diff = reorg - typical
    print(f"{subj:<12} {reorg:>14.2f} {typical:>10.2f} {diff:>12.2f}")

print("-" * 50)
print(f"{'Mean':<12} {paired_data['Reorganizing'].mean():>14.2f} {paired_data['Typical'].mean():>10.2f} {(paired_data['Reorganizing'] - paired_data['Typical']).mean():>12.2f}")

# Paired t-test
from scipy.stats import ttest_rel

t, p = ttest_rel(paired_data['Reorganizing'], paired_data['Typical'])

# Effect size (Cohen's d for paired data)
diff = paired_data['Reorganizing'] - paired_data['Typical']
cohens_d = diff.mean() / diff.std()

print(f"\n{'=' * 70}")
print("PAIRED T-TEST RESULTS")
print("=" * 70)
print(f"  t({len(paired_data)-1}) = {t:.3f}")
print(f"  p = {p:.4f} {'*' if p < 0.05 else ''}")
print(f"  Cohen's d = {cohens_d:.2f}")
print(f"\n  Mean difference: {diff.mean():.2f} mm (Reorganizing - Typical)")
print(f"  95% CI: [{diff.mean() - 1.96*diff.sem():.2f}, {diff.mean() + 1.96*diff.sem():.2f}]")

# Interpretation
print(f"\n{'=' * 70}")
print("INTERPRETATION")
print("=" * 70)
if p < 0.05:
    print(f"\n  Reorganizing categories show significantly MORE spatial drift")
    print(f"  than typical categories within the same patients.")
    print(f"\n  This suggests that visual regions processing categories in their")
    print(f"  'non-native' hemisphere are less spatially stable over time.")
else:
    print(f"\n  No significant within-subject difference in spatial drift.")
print("=" * 70)

PAIRED ANALYSIS: Reorganizing vs Typical (Within-Subject)

Subject        Reorganizing    Typical   Difference
--------------------------------------------------
sub-004               22.29       6.48        15.81
sub-008                5.10       2.45         2.65
sub-010               15.00       5.10         9.90
sub-017               31.80       2.00        29.80
sub-021                1.00       3.74        -2.74
sub-079               25.38       1.41        23.96
--------------------------------------------------
Mean                  16.76       3.53        13.23

PAIRED T-TEST RESULTS
  t(5) = 2.604
  p = 0.0480 *
  Cohen's d = 1.06

  Mean difference: 13.23 mm (Reorganizing - Typical)
  95% CI: [3.27, 23.19]

INTERPRETATION

  Reorganizing categories show significantly MORE spatial drift
  than typical categories within the same patients.

  This suggests that visual regions processing categories in their
  'non-native' hemisphere are less spatially stable over time.


In [26]:
# =============================================================================
# CELL 16 (UPDATED): Comprehensive Group Comparisons - Corrected
# =============================================================================
# Unilateral: Compare by category (Face vs Face, Word vs Word)
# Bilateral: Compare by matched hemisphere
# =============================================================================

from scipy.stats import ttest_ind, f_oneway

print("=" * 85)
print("PART 1: UNILATERAL CATEGORIES - Compared by Category")
print("=" * 85)

# --- Define groups by category ---
otc_reorg = dir_drift_filtered[dir_drift_filtered['reorg_status'] == 'Reorganizing']
otc_typical = dir_drift_filtered[dir_drift_filtered['reorg_status'] == 'Typical']

otc_reorg_face = otc_reorg[otc_reorg['category'] == 'face']['euclidean_dist']  # LH, n=2
otc_reorg_word = otc_reorg[otc_reorg['category'] == 'word']['euclidean_dist']  # RH, n=4
otc_typical_face = otc_typical[otc_typical['category'] == 'face']['euclidean_dist']  # RH, n=4
otc_typical_word = otc_typical[otc_typical['category'] == 'word']['euclidean_dist']  # LH, n=2

nonotc_uni = dir_drift_filtered[(dir_drift_filtered['group'] == 'nonOTC') & 
                                 (dir_drift_filtered['category'].isin(['face', 'word']))]
nonotc_face = nonotc_uni[nonotc_uni['category'] == 'face']['euclidean_dist']
nonotc_word = nonotc_uni[nonotc_uni['category'] == 'word']['euclidean_dist']

ctrl_uni = dir_drift_filtered[(dir_drift_filtered['group'] == 'Control') & 
                               (dir_drift_filtered['category'].isin(['face', 'word']))]
ctrl_face = ctrl_uni[ctrl_uni['category'] == 'face']['euclidean_dist']  # RH only, n=7
ctrl_word = ctrl_uni[ctrl_uni['category'] == 'word']['euclidean_dist']  # LH only, n=7

# --- Descriptives ---
print("\n--- Descriptive Statistics by Category ---\n")
print(f"{'Group':<20} {'Category':<10} {'Hemi':<8} {'n':>4} {'Mean':>8} {'SD':>8}")
print("-" * 65)
print(f"{'OTC Reorganizing':<20} {'Face':<10} {'LH':<8} {len(otc_reorg_face):>4} {otc_reorg_face.mean():>8.2f} {otc_reorg_face.std():>8.2f}")
print(f"{'OTC Reorganizing':<20} {'Word':<10} {'RH':<8} {len(otc_reorg_word):>4} {otc_reorg_word.mean():>8.2f} {otc_reorg_word.std():>8.2f}")
print(f"{'OTC Typical':<20} {'Face':<10} {'RH':<8} {len(otc_typical_face):>4} {otc_typical_face.mean():>8.2f} {otc_typical_face.std():>8.2f}")
print(f"{'OTC Typical':<20} {'Word':<10} {'LH':<8} {len(otc_typical_word):>4} {otc_typical_word.mean():>8.2f} {otc_typical_word.std():>8.2f}")
print(f"{'nonOTC':<20} {'Face':<10} {'RH':<8} {len(nonotc_face):>4} {nonotc_face.mean():>8.2f} {nonotc_face.std():>8.2f}")
print(f"{'nonOTC':<20} {'Word':<10} {'LH':<8} {len(nonotc_word):>4} {nonotc_word.mean():>8.2f} {nonotc_word.std():>8.2f}")
print(f"{'Control':<20} {'Face':<10} {'RH':<8} {len(ctrl_face):>4} {ctrl_face.mean():>8.2f} {ctrl_face.std():>8.2f}")
print(f"{'Control':<20} {'Word':<10} {'LH':<8} {len(ctrl_word):>4} {ctrl_word.mean():>8.2f} {ctrl_word.std():>8.2f}")

# --- Statistical Comparisons by Category ---
print("\n--- Statistical Comparisons (by category) ---\n")
print(f"{'Comparison':<45} {'n1':>4} {'n2':>4} {'t':>8} {'p':>10} {'Sig':>6}")
print("-" * 80)

# Face comparisons (OTC Reorg in LH vs others in RH)
comparisons_face = [
    ('OTC Reorg Face (LH) vs OTC Typical Face (RH)', otc_reorg_face, otc_typical_face),
    ('OTC Reorg Face (LH) vs nonOTC Face (RH)', otc_reorg_face, nonotc_face),
    ('OTC Reorg Face (LH) vs Control Face (RH)', otc_reorg_face, ctrl_face),
    ('OTC Typical Face (RH) vs Control Face (RH)', otc_typical_face, ctrl_face),
]

for label, d1, d2 in comparisons_face:
    if len(d1) >= 2 and len(d2) >= 2:
        t, p = ttest_ind(d1, d2)
        sig = '*' if p < 0.05 else ''
        print(f"{label:<45} {len(d1):>4} {len(d2):>4} {t:>8.3f} {p:>10.4f} {sig:>6}")

# Word comparisons (OTC Reorg in RH vs others in LH)
comparisons_word = [
    ('OTC Reorg Word (RH) vs OTC Typical Word (LH)', otc_reorg_word, otc_typical_word),
    ('OTC Reorg Word (RH) vs nonOTC Word (LH)', otc_reorg_word, nonotc_word),
    ('OTC Reorg Word (RH) vs Control Word (LH)', otc_reorg_word, ctrl_word),
    ('OTC Typical Word (LH) vs Control Word (LH)', otc_typical_word, ctrl_word),
]

for label, d1, d2 in comparisons_word:
    if len(d1) >= 2 and len(d2) >= 2:
        t, p = ttest_ind(d1, d2)
        sig = '*' if p < 0.05 else ''
        print(f"{label:<45} {len(d1):>4} {len(d2):>4} {t:>8.3f} {p:>10.4f} {sig:>6}")

# ============================================================
# PART 2: BILATERAL CATEGORIES - By Matched Hemisphere
# ============================================================
print("\n" + "=" * 85)
print("PART 2: BILATERAL CATEGORIES - Compared by Matched Hemisphere")
print("=" * 85)

# --- Define groups ---
otc_bil = dir_drift_filtered[(dir_drift_filtered['group'] == 'OTC') & 
                              (dir_drift_filtered['category'].isin(['object', 'house']))]
nonotc_bil = dir_drift_filtered[(dir_drift_filtered['group'] == 'nonOTC') & 
                                 (dir_drift_filtered['category'].isin(['object', 'house']))]
ctrl_bil = dir_drift_filtered[(dir_drift_filtered['group'] == 'Control') & 
                               (dir_drift_filtered['category'].isin(['object', 'house']))]

# Split by hemisphere
otc_bil_L = otc_bil[otc_bil['hemisphere'] == 'l']['euclidean_dist']
otc_bil_R = otc_bil[otc_bil['hemisphere'] == 'r']['euclidean_dist']
nonotc_bil_L = nonotc_bil[nonotc_bil['hemisphere'] == 'l']['euclidean_dist']
nonotc_bil_R = nonotc_bil[nonotc_bil['hemisphere'] == 'r']['euclidean_dist']
ctrl_bil_L = ctrl_bil[ctrl_bil['hemisphere'] == 'l']['euclidean_dist']
ctrl_bil_R = ctrl_bil[ctrl_bil['hemisphere'] == 'r']['euclidean_dist']

# --- Descriptives ---
print("\n--- Descriptive Statistics by Hemisphere ---\n")
print(f"{'Group':<15} {'Hemi':<8} {'n':>4} {'Mean':>8} {'SD':>8}")
print("-" * 50)
print(f"{'OTC':<15} {'LH':<8} {len(otc_bil_L):>4} {otc_bil_L.mean():>8.2f} {otc_bil_L.std():>8.2f}")
print(f"{'OTC':<15} {'RH':<8} {len(otc_bil_R):>4} {otc_bil_R.mean():>8.2f} {otc_bil_R.std():>8.2f}")
print(f"{'nonOTC':<15} {'LH':<8} {len(nonotc_bil_L):>4} {nonotc_bil_L.mean():>8.2f} {nonotc_bil_L.std():>8.2f}")
print(f"{'nonOTC':<15} {'RH':<8} {len(nonotc_bil_R):>4} {nonotc_bil_R.mean():>8.2f} {nonotc_bil_R.std():>8.2f}")
print(f"{'Control':<15} {'LH':<8} {len(ctrl_bil_L):>4} {ctrl_bil_L.mean():>8.2f} {ctrl_bil_L.std():>8.2f}")
print(f"{'Control':<15} {'RH':<8} {len(ctrl_bil_R):>4} {ctrl_bil_R.mean():>8.2f} {ctrl_bil_R.std():>8.2f}")

# --- Statistical Comparisons by Hemisphere ---
print("\n--- Statistical Comparisons (matched hemisphere) ---\n")
print(f"{'Comparison':<40} {'n1':>4} {'n2':>4} {'t':>8} {'p':>10} {'Sig':>6}")
print("-" * 75)

# LH comparisons
comparisons_L = [
    ('OTC LH vs nonOTC LH', otc_bil_L, nonotc_bil_L),
    ('OTC LH vs Control LH', otc_bil_L, ctrl_bil_L),
    ('nonOTC LH vs Control LH', nonotc_bil_L, ctrl_bil_L),
]

for label, d1, d2 in comparisons_L:
    if len(d1) >= 2 and len(d2) >= 2:
        t, p = ttest_ind(d1, d2)
        sig = '*' if p < 0.05 else ''
        print(f"{label:<40} {len(d1):>4} {len(d2):>4} {t:>8.3f} {p:>10.4f} {sig:>6}")

# RH comparisons
comparisons_R = [
    ('OTC RH vs nonOTC RH', otc_bil_R, nonotc_bil_R),
    ('OTC RH vs Control RH', otc_bil_R, ctrl_bil_R),
    ('nonOTC RH vs Control RH', nonotc_bil_R, ctrl_bil_R),
]

for label, d1, d2 in comparisons_R:
    if len(d1) >= 2 and len(d2) >= 2:
        t, p = ttest_ind(d1, d2)
        sig = '*' if p < 0.05 else ''
        print(f"{label:<40} {len(d1):>4} {len(d2):>4} {t:>8.3f} {p:>10.4f} {sig:>6}")

print("\n" + "=" * 85)
print("NOTE: Unilateral comparisons cross hemispheres by design")
print("      (e.g., OTC Reorg Face in LH vs Control Face in RH)")
print("      Bilateral comparisons use matched hemispheres")
print("=" * 85)

PART 1: UNILATERAL CATEGORIES - Compared by Category

--- Descriptive Statistics by Category ---

Group                Category   Hemi        n     Mean       SD
-----------------------------------------------------------------
OTC Reorganizing     Face       LH          2    13.70    12.16
OTC Reorganizing     Word       RH          4    18.29    13.45
OTC Typical          Face       RH          4     3.06     1.68
OTC Typical          Word       LH          2     4.47     2.85
nonOTC               Face       RH          7     2.83     4.98
nonOTC               Word       LH          7    10.78     8.15
Control              Face       RH          7     1.37     0.99
Control              Word       LH          7    14.22    10.92

--- Statistical Comparisons (by category) ---

Comparison                                      n1   n2        t          p    Sig
--------------------------------------------------------------------------------
OTC Reorg Face (LH) vs OTC Typical Face (RH)    

In [27]:
# Bilateral breakdown by category and hemisphere
bil_data = dir_drift_filtered[dir_drift_filtered['category'].isin(['object', 'house'])]

print(f"{'Group':<10} {'Category':<10} {'Hemi':<6} {'n':>4} {'Mean':>8} {'SD':>8}")
print("-" * 50)

for group in ['OTC', 'nonOTC', 'Control']:
    for cat in ['house', 'object']:
        for hemi in ['l', 'r']:
            subset = bil_data[(bil_data['group'] == group) & 
                              (bil_data['category'] == cat) & 
                              (bil_data['hemisphere'] == hemi)]['euclidean_dist']
            if len(subset) > 0:
                print(f"{group:<10} {cat:<10} {hemi.upper():<6} {len(subset):>4} {subset.mean():>8.2f} {subset.std():>8.2f}")

Group      Category   Hemi      n     Mean       SD
--------------------------------------------------
OTC        house      L         2    17.89     6.22
OTC        house      R         4    14.23    23.78
OTC        object     L         2     6.85     3.36
OTC        object     R         4     4.38     2.24
nonOTC     house      L         4     2.88     2.57
nonOTC     house      R         3     4.08     2.24
nonOTC     object     L         4     1.83     0.47
nonOTC     object     R         3     9.80     9.14
Control    house      L         7    12.66    16.24
Control    house      R         7     6.67     7.82
Control    object     L         7    10.34    11.60
Control    object     R         7     7.79     5.08


In [30]:
# =============================================================================
# CELL 18: Age as Covariate Analysis
# =============================================================================
# Test whether drift variability is related to age, particularly for words/houses
# =============================================================================

import statsmodels.api as sm
from scipy.stats import pearsonr, spearmanr

# --- Merge age data ---
age_map = df.set_index('sub')[['age_1', 'age_2']].to_dict('index')

dir_drift_filtered['age_T1'] = dir_drift_filtered['subject'].map(
    lambda x: age_map.get(x, {}).get('age_1', None))
dir_drift_filtered['age_T2'] = dir_drift_filtered['subject'].map(
    lambda x: age_map.get(x, {}).get('age_2', None))

# Calculate mean age
dir_drift_filtered['age_mean'] = (dir_drift_filtered['age_T1'] + dir_drift_filtered['age_T2']) / 2

# ============================================================
# PART 1: Correlations - Age vs Drift by Category
# ============================================================
print("=" * 80)
print("PART 1: Correlations - Age vs Euclidean Drift")
print("=" * 80)

print(f"\n{'Group':<12} {'Category':<10} {'n':>4} {'r':>8} {'p':>10} {'Sig':>6}")
print("-" * 55)

for group in ['OTC', 'nonOTC', 'Control']:
    for cat in ['face', 'word', 'object', 'house']:
        subset = dir_drift_filtered[(dir_drift_filtered['group'] == group) & 
                                     (dir_drift_filtered['category'] == cat)]
        subset = subset.dropna(subset=['age_T1', 'euclidean_dist'])
        
        if len(subset) >= 3:
            r, p = pearsonr(subset['age_T1'], subset['euclidean_dist'])
            sig = '*' if p < 0.05 else ''
            print(f"{group:<12} {cat:<10} {len(subset):>4} {r:>8.3f} {p:>10.4f} {sig:>6}")

# ============================================================
# PART 2: Correlations - Pooled across groups
# ============================================================
print("\n" + "=" * 80)
print("PART 2: Correlations Pooled Across Groups")
print("=" * 80)

print(f"\n{'Category':<10} {'n':>4} {'r':>8} {'p':>10} {'Sig':>6}")
print("-" * 45)

for cat in ['face', 'word', 'object', 'house']:
    subset = dir_drift_filtered[dir_drift_filtered['category'] == cat]
    subset = subset.dropna(subset=['age_T1', 'euclidean_dist'])
    
    if len(subset) >= 3:
        r, p = pearsonr(subset['age_T1'], subset['euclidean_dist'])
        sig = '*' if p < 0.05 else ''
        print(f"{cat:<10} {len(subset):>4} {r:>8.3f} {p:>10.4f} {sig:>6}")

# ============================================================
# PART 3: Regression with Age Covariate (Words only)
# ============================================================
print("\n" + "=" * 80)
print("PART 3: Regression - Word Drift ~ Group + Age")
print("=" * 80)

word_data = dir_drift_filtered[dir_drift_filtered['category'] == 'word'].copy()
word_data = word_data.dropna(subset=['age_T1', 'euclidean_dist'])

# Create dummy variables for group
word_data['is_OTC_reorg'] = ((word_data['group'] == 'OTC') & 
                              (word_data['reorg_status'] == 'Reorganizing')).astype(int)
word_data['is_OTC_typical'] = ((word_data['group'] == 'OTC') & 
                                (word_data['reorg_status'] == 'Typical')).astype(int)
word_data['is_nonOTC'] = (word_data['group'] == 'nonOTC').astype(int)
# Control is reference

# Regression
X = word_data[['is_OTC_reorg', 'is_OTC_typical', 'is_nonOTC', 'age_T1']]
X = sm.add_constant(X)
y = word_data['euclidean_dist']

if len(word_data) > 5:
    model = sm.OLS(y, X).fit()
    print("\nWord Drift ~ Group + Age (Control = reference)")
    print(model.summary().tables[1])

# ============================================================
# PART 4: Regression with Age Covariate (Faces only)
# ============================================================
print("\n" + "=" * 80)
print("PART 4: Regression - Face Drift ~ Group + Age")
print("=" * 80)

face_data = dir_drift_filtered[dir_drift_filtered['category'] == 'face'].copy()
face_data = face_data.dropna(subset=['age_T1', 'euclidean_dist'])

face_data['is_OTC_reorg'] = ((face_data['group'] == 'OTC') & 
                              (face_data['reorg_status'] == 'Reorganizing')).astype(int)
face_data['is_OTC_typical'] = ((face_data['group'] == 'OTC') & 
                                (face_data['reorg_status'] == 'Typical')).astype(int)
face_data['is_nonOTC'] = (face_data['group'] == 'nonOTC').astype(int)

X = face_data[['is_OTC_reorg', 'is_OTC_typical', 'is_nonOTC', 'age_T1']]
X = sm.add_constant(X)
y = face_data['euclidean_dist']

if len(face_data) > 5:
    model = sm.OLS(y, X).fit()
    print("\nFace Drift ~ Group + Age (Control = reference)")
    print(model.summary().tables[1])

# ============================================================
# PART 5: Regression with Age Covariate (House only)
# ============================================================
print("\n" + "=" * 80)
print("PART 5: Regression - House Drift ~ Group + Age")
print("=" * 80)

house_data = dir_drift_filtered[dir_drift_filtered['category'] == 'house'].copy()
house_data = house_data.dropna(subset=['age_T1', 'euclidean_dist'])

house_data['is_OTC'] = (house_data['group'] == 'OTC').astype(int)
house_data['is_nonOTC'] = (house_data['group'] == 'nonOTC').astype(int)

X = house_data[['is_OTC', 'is_nonOTC', 'age_T1']]
X = sm.add_constant(X)
y = house_data['euclidean_dist']

if len(house_data) > 5:
    model = sm.OLS(y, X).fit()
    print("\nHouse Drift ~ Group + Age (Control = reference)")
    print(model.summary().tables[1])

# ============================================================
# PART 6: Regression with Age Covariate (Object only)
# ============================================================
print("\n" + "=" * 80)
print("PART 6: Regression - Object Drift ~ Group + Age")
print("=" * 80)

object_data = dir_drift_filtered[dir_drift_filtered['category'] == 'object'].copy()
object_data = object_data.dropna(subset=['age_T1', 'euclidean_dist'])

object_data['is_OTC'] = (object_data['group'] == 'OTC').astype(int)
object_data['is_nonOTC'] = (object_data['group'] == 'nonOTC').astype(int)

X = object_data[['is_OTC', 'is_nonOTC', 'age_T1']]
X = sm.add_constant(X)
y = object_data['euclidean_dist']

if len(object_data) > 5:
    model = sm.OLS(y, X).fit()
    print("\nObject Drift ~ Group + Age (Control = reference)")
    print(model.summary().tables[1])

print("\n" + "=" * 80)

PART 1: Correlations - Age vs Euclidean Drift

Group        Category      n        r          p    Sig
-------------------------------------------------------
OTC          face          6   -0.943     0.0047      *
OTC          word          6    0.527     0.2830       
OTC          object        6   -0.715     0.1101       
OTC          house         6   -0.458     0.3616       
nonOTC       face          7    0.093     0.8436       
nonOTC       word          7   -0.408     0.3629       
nonOTC       object        7    0.153     0.7434       
nonOTC       house         7    0.857     0.0138      *
Control      face          7    0.150     0.7483       
Control      word          7    0.385     0.3931       
Control      object       14    0.327     0.2539       
Control      house        14    0.287     0.3198       

PART 2: Correlations Pooled Across Groups

Category      n        r          p    Sig
---------------------------------------------
face         20   -0.439     0.0526 

In [31]:
# Directional drift for OTC Reorganizing only
otc_reorg_dir = dir_drift_filtered[dir_drift_filtered['reorg_status'] == 'Reorganizing']

print("=" * 70)
print("OTC REORGANIZING - Signed Drift Values")
print("=" * 70)

for cat in ['face', 'word']:
    subset = otc_reorg_dir[otc_reorg_dir['category'] == cat]
    print(f"\n{cat.upper()} (n={len(subset)}):")
    print(f"  Δx (M-L):  {subset['delta_x'].mean():>6.2f} ± {subset['delta_x'].std():.2f}")
    print(f"  Δy (A-P):  {subset['delta_y'].mean():>6.2f} ± {subset['delta_y'].std():.2f}")
    print(f"  Δz (S-I):  {subset['delta_z'].mean():>6.2f} ± {subset['delta_z'].std():.2f}")
    
    # One-sample t-test if n >= 3
    if len(subset) >= 3:
        for coord, label in [('delta_x', 'Δx'), ('delta_y', 'Δy'), ('delta_z', 'Δz')]:
            t, p = ttest_1samp(subset[coord], 0)
            sig = '*' if p < 0.05 else ''
            print(f"    {label}: t={t:.2f}, p={p:.4f} {sig}")
    else:
        print("    (n too small for t-test)")
    
    # Show individual values
    print(f"\n  Individual values:")
    for _, row in subset.iterrows():
        print(f"    {row['subject']}: Δx={row['delta_x']:.1f}, Δy={row['delta_y']:.1f}, Δz={row['delta_z']:.1f}")

print("\n" + "=" * 70)

OTC REORGANIZING - Signed Drift Values

FACE (n=2):
  Δx (M-L):   -2.00 ± 2.83
  Δy (A-P):   -7.50 ± 17.68
  Δz (S-I):    5.00 ± 5.66
    (n too small for t-test)

  Individual values:
    sub-004: Δx=-4.0, Δy=-20.0, Δz=9.0
    sub-008: Δx=0.0, Δy=5.0, Δz=1.0

WORD (n=4):
  Δx (M-L):    2.58 ± 5.83
  Δy (A-P):   11.14 ± 19.32
  Δz (S-I):    0.75 ± 6.70
    Δx: t=0.88, p=0.4415 
    Δy: t=1.15, p=0.3324 
    Δz: t=0.22, p=0.8368 

  Individual values:
    sub-010: Δx=-5.0, Δy=-10.0, Δz=10.0
    sub-017: Δx=7.0, Δy=31.0, Δz=-1.0
    sub-021: Δx=1.0, Δy=0.0, Δz=0.0
    sub-079: Δx=7.3, Δy=23.6, Δz=-6.0



In [32]:
# =============================================================================
# CELL 19: Crawford Single-Case Statistics
# =============================================================================
# Compare each OTC Reorganizing patient's drift against control distribution
# =============================================================================

def crawford_t_test(patient_score, control_scores):
    """
    Crawford's modified t-test for single-case comparison.
    Returns t-statistic, two-tailed p-value, and effect size (z-cc).
    """
    n = len(control_scores)
    ctrl_mean = control_scores.mean()
    ctrl_std = control_scores.std(ddof=1)
    
    # Crawford's t
    t_stat = (patient_score - ctrl_mean) / (ctrl_std * np.sqrt((n + 1) / n))
    
    # Two-tailed p-value (t distribution with n-1 df)
    from scipy.stats import t
    p_val = 2 * (1 - t.cdf(abs(t_stat), df=n-1))
    
    # Effect size (z-cc)
    z_cc = (patient_score - ctrl_mean) / ctrl_std
    
    return t_stat, p_val, z_cc

# Get control data for comparison
ctrl_face = dir_drift_filtered[(dir_drift_filtered['group'] == 'Control') & 
                                (dir_drift_filtered['category'] == 'face')]
ctrl_word = dir_drift_filtered[(dir_drift_filtered['group'] == 'Control') & 
                                (dir_drift_filtered['category'] == 'word')]

# OTC Reorganizing data
otc_reorg = dir_drift_filtered[dir_drift_filtered['reorg_status'] == 'Reorganizing']

print("=" * 80)
print("CRAWFORD SINGLE-CASE TESTS: OTC Reorganizing vs Controls")
print("=" * 80)

# --- FACES (OTC Reorg in LH vs Control in RH) ---
print("\n--- FACES ---")
print(f"Control Face (RH): Mean = {ctrl_face['euclidean_dist'].mean():.2f}, SD = {ctrl_face['euclidean_dist'].std():.2f}, n = {len(ctrl_face)}")
print()

otc_reorg_face = otc_reorg[otc_reorg['category'] == 'face']
print(f"{'Subject':<12} {'Drift (mm)':>12} {'Crawford t':>12} {'p':>10} {'z-cc':>10} {'Sig':>6}")
print("-" * 65)

for _, row in otc_reorg_face.iterrows():
    t_stat, p_val, z_cc = crawford_t_test(row['euclidean_dist'], ctrl_face['euclidean_dist'])
    sig = '*' if p_val < 0.05 else ''
    print(f"{row['subject']:<12} {row['euclidean_dist']:>12.2f} {t_stat:>12.2f} {p_val:>10.4f} {z_cc:>10.2f} {sig:>6}")

# --- WORDS (OTC Reorg in RH vs Control in LH) ---
print("\n--- WORDS ---")
print(f"Control Word (LH): Mean = {ctrl_word['euclidean_dist'].mean():.2f}, SD = {ctrl_word['euclidean_dist'].std():.2f}, n = {len(ctrl_word)}")
print()

otc_reorg_word = otc_reorg[otc_reorg['category'] == 'word']
print(f"{'Subject':<12} {'Drift (mm)':>12} {'Crawford t':>12} {'p':>10} {'z-cc':>10} {'Sig':>6}")
print("-" * 65)

for _, row in otc_reorg_word.iterrows():
    t_stat, p_val, z_cc = crawford_t_test(row['euclidean_dist'], ctrl_word['euclidean_dist'])
    sig = '*' if p_val < 0.05 else ''
    print(f"{row['subject']:<12} {row['euclidean_dist']:>12.2f} {t_stat:>12.2f} {p_val:>10.4f} {z_cc:>10.2f} {sig:>6}")

# --- DIRECTIONALITY: Test each signed coordinate ---
print("\n" + "=" * 80)
print("CRAWFORD TESTS: Directional Drift (Signed Coordinates)")
print("=" * 80)

for cat, ctrl_data, otc_data in [('face', ctrl_face, otc_reorg_face), 
                                   ('word', ctrl_word, otc_reorg_word)]:
    print(f"\n--- {cat.upper()} ---")
    
    for coord, label in [('delta_x', 'Δx (M-L)'), ('delta_y', 'Δy (A-P)'), ('delta_z', 'Δz (S-I)')]:
        ctrl_vals = ctrl_data[coord].dropna()
        print(f"\n{label}: Control Mean = {ctrl_vals.mean():.2f}, SD = {ctrl_vals.std():.2f}")
        print(f"{'Subject':<12} {'Value':>10} {'Crawford t':>12} {'p':>10} {'Sig':>6}")
        print("-" * 50)
        
        for _, row in otc_data.iterrows():
            if pd.notna(row[coord]):
                t_stat, p_val, z_cc = crawford_t_test(row[coord], ctrl_vals)
                sig = '*' if p_val < 0.05 else ''
                print(f"{row['subject']:<12} {row[coord]:>10.2f} {t_stat:>12.2f} {p_val:>10.4f} {sig:>6}")

print("\n" + "=" * 80)


CRAWFORD SINGLE-CASE TESTS: OTC Reorganizing vs Controls

--- FACES ---
Control Face (RH): Mean = 1.37, SD = 0.99, n = 7

Subject        Drift (mm)   Crawford t          p       z-cc    Sig
-----------------------------------------------------------------
sub-004             22.29        19.74     0.0000      21.10      *
sub-008              5.10         3.52     0.0125       3.76      *

--- WORDS ---
Control Word (LH): Mean = 14.22, SD = 10.92, n = 7

Subject        Drift (mm)   Crawford t          p       z-cc    Sig
-----------------------------------------------------------------
sub-010             15.00         0.07     0.9488       0.07       
sub-017             31.80         1.51     0.1829       1.61       
sub-021              1.00        -1.13     0.3008      -1.21       
sub-079             25.38         0.96     0.3762       1.02       

CRAWFORD TESTS: Directional Drift (Signed Coordinates)

--- FACE ---

Δx (M-L): Control Mean = 0.14, SD = 0.69
Subject           Value

In [33]:
# =============================================================================
# CELL 20: Compare Word Peak Stability - Cope 12 vs Cope 13
# =============================================================================
# Test if Word > Scramble (cope 12) gives more stable peaks than Word > Face (cope 13)
# =============================================================================

def extract_peak_locations_word_cope12(subject_id):
    """Extract word peak using cope 12 (Word > Scramble) instead of cope 13"""
    
    info = ANALYSIS_SUBJECTS[subject_id]
    roi_dir = BASE_DIR / subject_id / f'ses-{info["sessions"][0]}' / 'ROIs'
    if not roi_dir.exists(): 
        return {}
    
    all_results = {}
    first_session = info['sessions'][0]
    cope_num = 12  # Word > Scramble
    multiplier = 1  # No flip needed

    for hemi in ['l', 'r']:
        mask_file = roi_dir / f'{hemi}_word_searchmask.nii.gz'
        if not mask_file.exists(): 
            continue
        
        try:
            search_mask_img = nib.load(mask_file)
            search_mask = search_mask_img.get_fdata() > 0
            affine = search_mask_img.affine
        except: 
            continue
        
        hemi_key = f'{hemi}_word'
        all_results[hemi_key] = {}
        
        for session in info['sessions']:
            feat_dir = BASE_DIR / subject_id / f'ses-{session}' / 'derivatives' / 'fsl' / 'loc' / 'HighLevel.gfeat'
            z_name = 'zstat1.nii.gz' if session == first_session else f'zstat1_ses{first_session}.nii.gz'
            cope_file = feat_dir / f'cope{cope_num}.feat' / 'stats' / z_name
            
            if not cope_file.exists(): 
                continue
            
            try:
                z_data = nib.load(cope_file).get_fdata() * multiplier
                z_masked = np.where(search_mask & (z_data > 0), z_data, -np.inf)
                peak_idx = np.unravel_index(np.argmax(z_masked), z_masked.shape)
                peak_z = z_data[peak_idx]
                
                if peak_z <= 0:
                    continue
                
                peak_mni = nib.affines.apply_affine(affine, peak_idx)
                
                all_results[hemi_key][session] = {
                    'peak_idx': peak_idx,
                    'peak_mni': peak_mni,
                    'peak_z': peak_z
                }
            except Exception as e:
                print(f"Error {subject_id} {hemi_key} ses-{session}: {e}")
                
    return all_results


# Extract word peaks with cope 12
print("Extracting word peaks with cope 12 (Word > Scramble)...")
peak_locations_cope12 = {}
for sub in ANALYSIS_SUBJECTS:
    if sub not in EXCLUDE_SUBS:
        res = extract_peak_locations_word_cope12(sub)
        if res: 
            peak_locations_cope12[sub] = res

# Compute drift for cope 12 words
def compute_word_drift_cope12(peak_results, subjects_dict):
    results = []
    
    for sid, rois in peak_results.items():
        info = subjects_dict.get(sid, {})
        
        for roi_key, sessions_data in rois.items():
            sessions = sorted(sessions_data.keys())
            if len(sessions) < 2:
                continue
            
            peak_t1 = np.array(sessions_data[sessions[0]]['peak_mni'])
            peak_t2 = np.array(sessions_data[sessions[-1]]['peak_mni'])
            drift_mm = np.linalg.norm(peak_t2 - peak_t1)
            
            hemi = roi_key.split('_')[0]
            
            results.append({
                'subject': sid,
                'group': info.get('group', 'control'),
                'hemi': hemi,
                'intact_hemi': info.get('hemi', 'unknown'),
                'euclidean_dist_cope12': drift_mm,
                't1_peak_z': sessions_data[sessions[0]]['peak_z'],
            })
    
    return pd.DataFrame(results)

drift_cope12 = compute_word_drift_cope12(peak_locations_cope12, ANALYSIS_SUBJECTS)

# Filter by appropriate hemisphere (word = LH for controls, intact for patients)
drift_cope12_filtered = drift_cope12[
    ((drift_cope12['group'].isin(['OTC', 'nonOTC'])) & (drift_cope12['hemi'] == drift_cope12['intact_hemi'])) |
    ((drift_cope12['group'] == 'control') & (drift_cope12['hemi'] == 'l'))
]

# Compare to cope 13 (current)
print("\n" + "=" * 80)
print("COMPARISON: Word Peak Stability by Contrast")
print("=" * 80)

# Get current cope 13 word data
cope13_word = dir_drift_filtered[dir_drift_filtered['category'] == 'word']

print("\n--- COPE 13 (Word > Face) ---")
print(f"{'Group':<12} {'n':>4} {'Mean (mm)':>12} {'SD':>10}")
print("-" * 45)
for group in ['OTC', 'nonOTC', 'Control']:
    if group == 'Control':
        subset = cope13_word[cope13_word['group'] == 'Control']
    else:
        subset = cope13_word[cope13_word['group'] == group]
    if len(subset) > 0:
        print(f"{group:<12} {len(subset):>4} {subset['euclidean_dist'].mean():>12.2f} {subset['euclidean_dist'].std():>10.2f}")

print("\n--- COPE 12 (Word > Scramble) ---")
print(f"{'Group':<12} {'n':>4} {'Mean (mm)':>12} {'SD':>10}")
print("-" * 45)
for group in ['OTC', 'nonOTC', 'control']:
    subset = drift_cope12_filtered[drift_cope12_filtered['group'] == group]
    if len(subset) > 0:
        group_label = 'Control' if group == 'control' else group
        print(f"{group_label:<12} {len(subset):>4} {subset['euclidean_dist_cope12'].mean():>12.2f} {subset['euclidean_dist_cope12'].std():>10.2f}")

# Direct comparison for controls
print("\n--- CONTROL WORD: Cope 12 vs Cope 13 ---")
ctrl_cope13 = cope13_word[cope13_word['group'] == 'Control']['euclidean_dist']
ctrl_cope12 = drift_cope12_filtered[drift_cope12_filtered['group'] == 'control']['euclidean_dist_cope12']

print(f"Cope 13 (Word > Face):     {ctrl_cope13.mean():.2f} ± {ctrl_cope13.std():.2f} mm (n={len(ctrl_cope13)})")
print(f"Cope 12 (Word > Scramble): {ctrl_cope12.mean():.2f} ± {ctrl_cope12.std():.2f} mm (n={len(ctrl_cope12)})")

if len(ctrl_cope13) > 2 and len(ctrl_cope12) > 2:
    t, p = ttest_ind(ctrl_cope13, ctrl_cope12)
    print(f"\nt-test: t = {t:.3f}, p = {p:.4f}")

print("\n" + "=" * 80)

Extracting word peaks with cope 12 (Word > Scramble)...

COMPARISON: Word Peak Stability by Contrast

--- COPE 13 (Word > Face) ---
Group           n    Mean (mm)         SD
---------------------------------------------
OTC             6        13.68      12.69
nonOTC          7        10.78       8.15
Control         7        14.22      10.92

--- COPE 12 (Word > Scramble) ---
Group           n    Mean (mm)         SD
---------------------------------------------
OTC             6        22.60      18.04
nonOTC          7         6.03       8.76
Control         7         6.20       6.01

--- CONTROL WORD: Cope 12 vs Cope 13 ---
Cope 13 (Word > Face):     14.22 ± 10.92 mm (n=7)
Cope 12 (Word > Scramble): 6.20 ± 6.01 mm (n=7)

t-test: t = 1.701, p = 0.1147

