In [5]:
# =============================================================================
# CELL 1: Setup and Configuration
# =============================================================================
import pandas as pd
from pathlib import Path
import nibabel as nib
import numpy as np
from scipy.ndimage import label, center_of_mass
from scipy.stats import pearsonr, ttest_ind, f_oneway, ttest_1samp
import matplotlib.pyplot as plt
import seaborn as sns

# Paths
CSV_FILE = Path('/user_data/csimmon2/git_repos/long_pt/long_pt_sub_info.csv')
BASE_DIR = Path("/user_data/csimmon2/long_pt")

# Session adjustments
SESSION_START = {'sub-010': 2, 'sub-018': 2, 'sub-068': 2}

# Contrast definitions
COPE_MAP_DIFFERENTIAL = {
    'face': (10, 1),
    'word': (13, -1),
    'object': (3, 1),
    'house': (11, 1)
}

# Exclusions
EXCLUDE_SUBS = ['sub-025', 'sub-027', 'sub-045', 'sub-072']

# Load subject info
df = pd.read_csv(CSV_FILE)

print("✓ Cell 1 complete")

✓ Cell 1 complete


In [6]:
# =============================================================================
# CELL 2: Load Subjects by Group
# =============================================================================
def load_subjects_by_group(group_filter=None, patient_only=True):
    filtered_df = df.copy()
    
    if patient_only is True:
        filtered_df = filtered_df[filtered_df['patient'] == 1]
    elif patient_only is False:
        filtered_df = filtered_df[filtered_df['patient'] == 0]
    
    if group_filter:
        if isinstance(group_filter, str):
            group_filter = [group_filter]
        filtered_df = filtered_df[filtered_df['group'].isin(group_filter)]
    
    subjects = {}
    for _, row in filtered_df.iterrows():
        subject_id = row['sub']
        subj_dir = BASE_DIR / subject_id
        if not subj_dir.exists():
            continue
        
        sessions = sorted([d.name.replace('ses-', '') for d in subj_dir.glob('ses-*') if d.is_dir()], key=int)
        start_session = SESSION_START.get(subject_id, 1)
        sessions = [s for s in sessions if int(s) >= start_session]
        if not sessions:
            continue
        
        hemisphere = 'l' if row.get('intact_hemi', 'left') == 'left' else 'r'
        
        subjects[subject_id] = {
            'code': f"{row['group']}{subject_id.split('-')[1]}",
            'sessions': sessions,
            'hemi': hemisphere,
            'group': row['group'],
            'patient_status': 'patient' if row['patient'] == 1 else 'control',
            'surgery_side': row.get('SurgerySide', None)
        }
    return subjects

ALL_PATIENTS = load_subjects_by_group(patient_only=True)
ALL_CONTROLS = load_subjects_by_group(patient_only=False)
ANALYSIS_SUBJECTS = {**ALL_PATIENTS, **ALL_CONTROLS}

print(f"✓ Loaded {len(ANALYSIS_SUBJECTS)} subjects")
for g in ['OTC', 'nonOTC', 'control']:
    n = sum(1 for v in ANALYSIS_SUBJECTS.values() if v['group'] == g)
    print(f"  {g}: {n}")

✓ Loaded 25 subjects
  OTC: 7
  nonOTC: 9
  control: 9


In [7]:
# =============================================================================
# CELL 3: SPATIAL DRIFT - Peak-Based Extraction (Golarai Method)
# =============================================================================
# Following Golarai et al. (2015): Track peak voxel location across sessions
# Drift = Euclidean distance between T1 and T2 peak coordinates (mm)
# =============================================================================

def extract_peak_locations(subject_id, cope_map):
    """Extract peak voxel (max T-value) locations within search mask for each session"""
    
    info = ANALYSIS_SUBJECTS[subject_id]
    roi_dir = BASE_DIR / subject_id / f'ses-{info["sessions"][0]}' / 'ROIs'
    if not roi_dir.exists(): 
        return {}
    
    all_results = {}
    first_session = info['sessions'][0]

    for hemi in ['l', 'r']:
        for category, (cope_num, multiplier) in cope_map.items():
            
            mask_file = roi_dir / f'{hemi}_{category}_searchmask.nii.gz'
            if not mask_file.exists(): 
                continue
            
            try:
                search_mask_img = nib.load(mask_file)
                search_mask = search_mask_img.get_fdata() > 0
                affine = search_mask_img.affine
            except: 
                continue
            
            hemi_key = f'{hemi}_{category}'
            all_results[hemi_key] = {}
            
            for session in info['sessions']:
                feat_dir = BASE_DIR / subject_id / f'ses-{session}' / 'derivatives' / 'fsl' / 'loc' / 'HighLevel.gfeat'
                z_name = 'zstat1.nii.gz' if session == first_session else f'zstat1_ses{first_session}.nii.gz'
                cope_file = feat_dir / f'cope{cope_num}.feat' / 'stats' / z_name
                
                if not cope_file.exists(): 
                    continue
                
                try:
                    z_data = nib.load(cope_file).get_fdata() * multiplier
                    z_masked = np.where(search_mask & (z_data > 0), z_data, -np.inf)
                    peak_idx = np.unravel_index(np.argmax(z_masked), z_masked.shape)
                    peak_z = z_data[peak_idx]
                    
                    if peak_z <= 0:
                        continue
                    
                    peak_mni = nib.affines.apply_affine(affine, peak_idx)
                    
                    all_results[hemi_key][session] = {
                        'peak_idx': peak_idx,
                        'peak_mni': peak_mni,
                        'peak_z': peak_z
                    }
                except Exception as e:
                    print(f"Error {subject_id} {hemi_key} ses-{session}: {e}")
                    
    return all_results


def compute_peak_drift(peak_results, subjects_dict):
    """Compute Euclidean distance between T1 and T2 peak locations (mm)"""
    
    results = []
    
    for sid, rois in peak_results.items():
        if sid in EXCLUDE_SUBS:
            continue
        info = subjects_dict.get(sid, {})
        
        for roi_key, sessions_data in rois.items():
            sessions = sorted(sessions_data.keys())
            if len(sessions) < 2:
                continue
            
            peak_t1 = np.array(sessions_data[sessions[0]]['peak_mni'])
            peak_t2 = np.array(sessions_data[sessions[-1]]['peak_mni'])
            drift_mm = np.linalg.norm(peak_t2 - peak_t1)
            
            t1_z = sessions_data[sessions[0]]['peak_z']
            hemi = roi_key.split('_')[0]
            category = roi_key.split('_')[1]
            
            results.append({
                'subject': sid,
                'code': subjects_dict[sid].get('code', sid),
                'group': subjects_dict[sid].get('group', 'unknown'),
                'hemi': hemi,
                'category': category,
                'category_type': 'Bilateral' if category in ['object', 'house'] else 'Unilateral',
                'peak_drift_mm': drift_mm,
                't1_peak_z': t1_z,
                't2_peak_z': sessions_data[sessions[-1]]['peak_z'],
                'flag': 'WEAK_SIGNAL' if t1_z < 2.3 else ''
            })
    
    return pd.DataFrame(results)


# Run extraction
print("Extracting Peak Locations (Golarai method)...")
peak_locations = {}
for sub in ANALYSIS_SUBJECTS:
    if sub not in EXCLUDE_SUBS:
        res = extract_peak_locations(sub, COPE_MAP_DIFFERENTIAL)
        if res: 
            peak_locations[sub] = res

print("Computing Peak-Based Drift...")
drift_peak = compute_peak_drift(peak_locations, ANALYSIS_SUBJECTS)

# Add hemisphere info
drift_peak['intact_hemi'] = drift_peak['subject'].map(lambda s: ANALYSIS_SUBJECTS[s]['hemi'])

print(f"\n✓ {drift_peak['subject'].nunique()} subjects, {len(drift_peak)} measurements")
print(f"  Excluded: {EXCLUDE_SUBS}")

# Save
drift_peak.to_csv('drift_peak_golarai.csv', index=False)
print("✓ Saved to drift_peak_golarai.csv")

Extracting Peak Locations (Golarai method)...
Computing Peak-Based Drift...

✓ 20 subjects, 108 measurements
  Excluded: ['sub-025', 'sub-027', 'sub-045', 'sub-072']
✓ Saved to drift_peak_golarai.csv


In [8]:
# =============================================================================
# CELL 4: SPATIAL DRIFT - Descriptives and Statistical Tests (Euclidean)
# =============================================================================
# Hemisphere selection:
#   OTC/nonOTC: intact hemisphere only
#   Controls: face=R, word=L, object/house=both
# =============================================================================

df_drift = drift_peak.copy()

# Filter by appropriate hemisphere
otc = df_drift[(df_drift['group'] == 'OTC') & (df_drift['hemi'] == df_drift['intact_hemi'])]
nonotc = df_drift[(df_drift['group'] == 'nonOTC') & (df_drift['hemi'] == df_drift['intact_hemi'])]
ctrl = df_drift[df_drift['group'] == 'control']

# Control subsets by category
ctrl_face = ctrl[(ctrl['category'] == 'face') & (ctrl['hemi'] == 'r')]
ctrl_word = ctrl[(ctrl['category'] == 'word') & (ctrl['hemi'] == 'l')]
ctrl_object = ctrl[ctrl['category'] == 'object']
ctrl_house = ctrl[ctrl['category'] == 'house']

ctrl_by_cat = {'face': ctrl_face, 'word': ctrl_word, 'object': ctrl_object, 'house': ctrl_house}

# ============================================================
# DESCRIPTIVES BY CATEGORY
# ============================================================
print("="*85)
print("SPATIAL DRIFT BY CATEGORY (Euclidean Distance)")
print("="*85)
print(f"\n{'Category':<10} {'Type':<8} {'OTC-intact':<22} {'nonOTC-intact':<22} {'Control':<22}")
print("-"*85)

for cat in ['face', 'word', 'object', 'house']:
    otc_vals = otc[otc['category'] == cat]['peak_drift_mm']
    nonotc_vals = nonotc[nonotc['category'] == cat]['peak_drift_mm']
    ctrl_vals = ctrl_by_cat[cat]['peak_drift_mm']
    
    cat_type = "Uni" if cat in ['face', 'word'] else "Bil"
    
    otc_str = f"{otc_vals.mean():.1f} ± {otc_vals.std():.1f} [n={len(otc_vals)}]"
    nonotc_str = f"{nonotc_vals.mean():.1f} ± {nonotc_vals.std():.1f} [n={len(nonotc_vals)}]"
    ctrl_str = f"{ctrl_vals.mean():.1f} ± {ctrl_vals.std():.1f} [n={len(ctrl_vals)}]"
    
    print(f"{cat:<10} {cat_type:<8} {otc_str:<22} {nonotc_str:<22} {ctrl_str:<22}")

print("-"*85)

# ============================================================
# DESCRIPTIVES BY CATEGORY TYPE
# ============================================================
print("\n" + "="*85)
print("SPATIAL DRIFT BY CATEGORY TYPE")
print("="*85)

otc_uni = otc[otc['category'].isin(['face', 'word'])]['peak_drift_mm']
otc_bil = otc[otc['category'].isin(['object', 'house'])]['peak_drift_mm']
nonotc_uni = nonotc[nonotc['category'].isin(['face', 'word'])]['peak_drift_mm']
nonotc_bil = nonotc[nonotc['category'].isin(['object', 'house'])]['peak_drift_mm']
ctrl_uni = pd.concat([ctrl_face['peak_drift_mm'], ctrl_word['peak_drift_mm']])
ctrl_bil = pd.concat([ctrl_object['peak_drift_mm'], ctrl_house['peak_drift_mm']])

print(f"\n{'Type':<12} {'OTC-intact':<22} {'nonOTC-intact':<22} {'Control':<22}")
print("-"*85)
print(f"{'Unilateral':<12} {otc_uni.mean():.1f} ± {otc_uni.std():.1f} [n={len(otc_uni)}]{'':5} {nonotc_uni.mean():.1f} ± {nonotc_uni.std():.1f} [n={len(nonotc_uni)}]{'':5} {ctrl_uni.mean():.1f} ± {ctrl_uni.std():.1f} [n={len(ctrl_uni)}]")
print(f"{'Bilateral':<12} {otc_bil.mean():.1f} ± {otc_bil.std():.1f} [n={len(otc_bil)}]{'':5} {nonotc_bil.mean():.1f} ± {nonotc_bil.std():.1f} [n={len(nonotc_bil)}]{'':5} {ctrl_bil.mean():.1f} ± {ctrl_bil.std():.1f} [n={len(ctrl_bil)}]")
print("-"*85)

# ============================================================
# STATISTICAL TESTS
# ============================================================
print("\n" + "="*85)
print("STATISTICAL TESTS (OTC vs Control)")
print("="*85)

for cat in ['face', 'word', 'object', 'house']:
    otc_vals = otc[otc['category'] == cat]['peak_drift_mm']
    ctrl_vals = ctrl_by_cat[cat]['peak_drift_mm']
    cat_type = "Uni" if cat in ['face', 'word'] else "Bil"
    
    if len(otc_vals) > 1 and len(ctrl_vals) > 1:
        t, p = ttest_ind(otc_vals, ctrl_vals)
        print(f"{cat} ({cat_type}): t = {t:.3f}, p = {p:.4f}")

print("\n" + "="*85)

SPATIAL DRIFT BY CATEGORY (Euclidean Distance)

Category   Type     OTC-intact             nonOTC-intact          Control               
-------------------------------------------------------------------------------------
face       Uni      6.6 ± 7.8 [n=6]        2.8 ± 5.0 [n=7]        1.4 ± 1.0 [n=7]       
word       Uni      13.7 ± 12.7 [n=6]      10.8 ± 8.1 [n=7]       14.2 ± 10.9 [n=7]     
object     Bil      5.2 ± 2.6 [n=6]        5.2 ± 6.8 [n=7]        9.1 ± 8.7 [n=14]      
house      Bil      15.4 ± 18.7 [n=6]      3.4 ± 2.3 [n=7]        9.7 ± 12.6 [n=14]     
-------------------------------------------------------------------------------------

SPATIAL DRIFT BY CATEGORY TYPE

Type         OTC-intact             nonOTC-intact          Control               
-------------------------------------------------------------------------------------
Unilateral   10.1 ± 10.7 [n=12]      6.8 ± 7.7 [n=14]      7.8 ± 10.0 [n=14]
Bilateral    10.3 ± 13.8 [n=12]      4.3 ± 5.0 [n=14]    

In [9]:
# =============================================================================
# CELL 5: DIRECTIONAL DRIFT - Extract Signed Coordinate Differences
# =============================================================================
# This extracts signed (T2 - T1) differences for each coordinate axis
# to test whether drift is SYSTEMATIC (directional) or just NOISE
# =============================================================================

def extract_directional_drift(peak_results, subjects_dict):
    """
    Extract signed coordinate differences (T2 - T1) for each ROI.
    Returns DataFrame with delta_x, delta_y, delta_z (signed) plus euclidean_dist.
    """
    results = []
    
    for sid, rois in peak_results.items():
        if sid in EXCLUDE_SUBS:
            continue
            
        info = subjects_dict.get(sid, {})
        group = info.get('group', 'control')
        if info.get('patient_status') == 'control': 
            group = 'Control'
        elif group == 'nonOTC':
            group = 'nonOTC'
        elif group == 'OTC':
            group = 'OTC'
        
        for roi_key, sessions_data in rois.items():
            sessions = sorted(sessions_data.keys())
            if len(sessions) < 2:
                continue
            
            hemi = roi_key.split('_')[0]
            cat = roi_key.split('_')[1]
            
            peak_t1 = sessions_data[sessions[0]]['peak_mni']
            peak_t2 = sessions_data[sessions[-1]]['peak_mni']
            
            # Signed differences (T2 - T1)
            dx = peak_t2[0] - peak_t1[0]  # medial-lateral
            dy = peak_t2[1] - peak_t1[1]  # anterior-posterior
            dz = peak_t2[2] - peak_t1[2]  # superior-inferior
            
            # Euclidean distance
            euclidean = np.sqrt(dx**2 + dy**2 + dz**2)
            
            results.append({
                'subject': sid,
                'code': info.get('code', sid),
                'group': group,
                'hemisphere': hemi,
                'category': cat,
                'category_type': 'Bilateral' if cat in ['object', 'house'] else 'Unilateral',
                'intact_hemi': info.get('hemi', 'unknown'),
                'x_T1': peak_t1[0], 'y_T1': peak_t1[1], 'z_T1': peak_t1[2],
                'x_T2': peak_t2[0], 'y_T2': peak_t2[1], 'z_T2': peak_t2[2],
                'delta_x': dx,
                'delta_y': dy,
                'delta_z': dz,
                'euclidean_dist': euclidean,
                't1_peak_z': sessions_data[sessions[0]]['peak_z'],
                't2_peak_z': sessions_data[sessions[-1]]['peak_z'],
                'T1_session': sessions[0],
                'T2_session': sessions[-1]
            })
    
    return pd.DataFrame(results)

# Extract directional drift
dir_drift_df = extract_directional_drift(peak_locations, ANALYSIS_SUBJECTS)

print(f"✓ Extracted {len(dir_drift_df)} ROIs with directional drift data")
print(f"  OTC: {len(dir_drift_df[dir_drift_df['group'] == 'OTC'])}")
print(f"  nonOTC: {len(dir_drift_df[dir_drift_df['group'] == 'nonOTC'])}")
print(f"  Control: {len(dir_drift_df[dir_drift_df['group'] == 'Control'])}")

dir_drift_df.head()

✓ Extracted 108 ROIs with directional drift data
  OTC: 24
  nonOTC: 28
  Control: 56


Unnamed: 0,subject,code,group,hemisphere,category,category_type,intact_hemi,x_T1,y_T1,z_T1,...,y_T2,z_T2,delta_x,delta_y,delta_z,euclidean_dist,t1_peak_z,t2_peak_z,T1_session,T2_session
0,sub-004,OTC004,OTC,l,face,Unilateral,l,-27.5,-32.0,-9.0,...,-52.0,0.0,-4.0,-20.0,9.0,22.293497,3.500539,13.334448,1,6
1,sub-004,OTC004,OTC,l,word,Unilateral,l,-29.5,-19.0,6.0,...,-18.0,1.0,4.0,1.0,-5.0,6.480741,4.249987,2.070664,1,6
2,sub-004,OTC004,OTC,l,object,Bilateral,l,-32.5,-75.0,13.0,...,-66.0,11.0,0.0,9.0,-2.0,9.219544,6.774813,11.288923,1,6
3,sub-004,OTC004,OTC,l,house,Bilateral,l,-19.5,-64.0,8.0,...,-42.0,10.0,-3.0,22.0,2.0,22.293497,5.253273,12.639606,1,6
4,sub-007,nonOTC007,nonOTC,r,face,Unilateral,r,34.5,-41.0,4.0,...,-42.0,5.0,1.0,-1.0,1.0,1.732051,9.558907,14.58206,1,4


In [10]:
# =============================================================================
# CELL 6: DIRECTIONAL DRIFT - Filter by Appropriate Hemisphere
# =============================================================================
# Same hemisphere rules as Euclidean analysis

# Filter function
def filter_by_hemisphere(df):
    """Apply hemisphere selection rules"""
    # OTC/nonOTC: intact hemisphere only
    otc = df[(df['group'] == 'OTC') & (df['hemisphere'] == df['intact_hemi'])]
    nonotc = df[(df['group'] == 'nonOTC') & (df['hemisphere'] == df['intact_hemi'])]
    
    # Controls: face=R, word=L, object/house=both
    ctrl = df[df['group'] == 'Control']
    ctrl_face = ctrl[(ctrl['category'] == 'face') & (ctrl['hemisphere'] == 'r')]
    ctrl_word = ctrl[(ctrl['category'] == 'word') & (ctrl['hemisphere'] == 'l')]
    ctrl_bilateral = ctrl[ctrl['category'].isin(['object', 'house'])]
    ctrl_filtered = pd.concat([ctrl_face, ctrl_word, ctrl_bilateral])
    
    return pd.concat([otc, nonotc, ctrl_filtered])

# Apply filter
dir_drift_filtered = filter_by_hemisphere(dir_drift_df)

print(f"✓ After hemisphere filtering: {len(dir_drift_filtered)} measurements")
print(f"  OTC: {len(dir_drift_filtered[dir_drift_filtered['group'] == 'OTC'])}")
print(f"  nonOTC: {len(dir_drift_filtered[dir_drift_filtered['group'] == 'nonOTC'])}")
print(f"  Control: {len(dir_drift_filtered[dir_drift_filtered['group'] == 'Control'])}")

✓ After hemisphere filtering: 94 measurements
  OTC: 24
  nonOTC: 28
  Control: 42


In [11]:
# =============================================================================
# CELL 7: DIRECTIONAL DRIFT - Statistical Tests for Systematic Drift
# =============================================================================
# H0: Mean drift = 0 (peaks fluctuate randomly around stable location)
# If p > 0.05 → No systematic drift (CONSISTENT with Dehaene-Lambertz)
# =============================================================================

def test_systematic_drift(df, group_col='group'):
    """
    Test whether drift is systematic (mean ≠ 0) for each group.
    """
    results = []
    coord_labels = {'delta_x': 'Medial-Lateral', 'delta_y': 'Anterior-Posterior', 'delta_z': 'Superior-Inferior'}
    
    for group in df[group_col].unique():
        group_data = df[df[group_col] == group]
        
        for coord in ['delta_x', 'delta_y', 'delta_z']:
            values = group_data[coord].dropna()
            n = len(values)
            
            if n < 3:
                continue
            
            # One-sample t-test: H0 = mean is 0
            t_stat, p_val = ttest_1samp(values, 0)
            
            # Effect size (Cohen's d)
            cohens_d = values.mean() / values.std() if values.std() > 0 else 0
            
            # 95% CI
            sem = values.sem()
            ci_low = values.mean() - 1.96 * sem
            ci_high = values.mean() + 1.96 * sem
            
            results.append({
                'Group': group,
                'Axis': coord_labels[coord],
                'n': n,
                'Mean (mm)': round(values.mean(), 2),
                'SD': round(values.std(), 2),
                '95% CI': f"[{ci_low:.1f}, {ci_high:.1f}]",
                't': round(t_stat, 2),
                'p': round(p_val, 4),
                "Cohen's d": round(cohens_d, 2),
                'Systematic?': 'Yes*' if p_val < 0.05 else 'No'
            })
    
    return pd.DataFrame(results)

# Run the test
drift_stats = test_systematic_drift(dir_drift_filtered)

print("=" * 90)
print("TEST FOR SYSTEMATIC DRIFT (One-sample t-test: H0 = mean drift is 0)")
print("=" * 90)
print("\nIf p > 0.05: No systematic drift → CONSISTENT with literature")
print("If p < 0.05: Systematic drift in that direction\n")
print(drift_stats.to_string(index=False))
print("\n" + "=" * 90)

TEST FOR SYSTEMATIC DRIFT (One-sample t-test: H0 = mean drift is 0)

If p > 0.05: No systematic drift → CONSISTENT with literature
If p < 0.05: Systematic drift in that direction

  Group               Axis  n  Mean (mm)    SD      95% CI     t      p  Cohen's d Systematic?
    OTC     Medial-Lateral 24      -0.36  3.94 [-1.9, 1.2] -0.44 0.6612      -0.09          No
    OTC Anterior-Posterior 24       4.15 14.40 [-1.6, 9.9]  1.41 0.1718       0.29          No
    OTC  Superior-Inferior 24       0.20  3.81 [-1.3, 1.7]  0.26 0.8009       0.05          No
 nonOTC     Medial-Lateral 28      -0.75  4.36 [-2.4, 0.9] -0.92 0.3680      -0.17          No
 nonOTC Anterior-Posterior 28       1.42  6.71 [-1.1, 3.9]  1.12 0.2721       0.21          No
 nonOTC  Superior-Inferior 28      -0.12  2.71 [-1.1, 0.9] -0.24 0.8121      -0.05          No
Control     Medial-Lateral 42       1.44  6.16 [-0.4, 3.3]  1.52 0.1374       0.23          No
Control Anterior-Posterior 42      -1.57 10.61 [-4.8, 1.6] -

In [12]:
# =============================================================================
# CELL 8: DIRECTIONAL DRIFT - Test by Category
# =============================================================================

def test_drift_by_group_category(df):
    """Test systematic drift for each group × category combination."""
    results = []
    
    for group in ['OTC', 'nonOTC', 'Control']:
        for category in ['face', 'word', 'object', 'house']:
            subset = df[(df['group'] == group) & (df['category'] == category)]
            
            if len(subset) < 3:
                continue
            
            row = {'Group': group, 'Category': category, 'n': len(subset)}
            
            for coord, label in [('delta_x', 'Δx'), ('delta_y', 'Δy'), ('delta_z', 'Δz')]:
                values = subset[coord].dropna()
                if len(values) >= 3:
                    t_stat, p_val = ttest_1samp(values, 0)
                    sig = '*' if p_val < 0.05 else ''
                    row[f'{label} mean'] = f"{values.mean():.1f}{sig}"
                else:
                    row[f'{label} mean'] = 'n/a'
            
            results.append(row)
    
    return pd.DataFrame(results)

category_drift = test_drift_by_group_category(dir_drift_filtered)
print("\nSYSTEMATIC DRIFT BY GROUP × CATEGORY (* = p < 0.05):")
print(category_drift.to_string(index=False))


SYSTEMATIC DRIFT BY GROUP × CATEGORY (* = p < 0.05):
  Group Category  n Δx mean Δy mean Δz mean
    OTC     face  6    -0.3    -2.4     0.8
    OTC     word  6     2.6     7.9    -0.2
    OTC   object  6     0.2     1.7    -0.9
    OTC    house  6    -3.8     9.3    1.0*
 nonOTC     face  7     1.1     1.7     0.2
 nonOTC     word  7    -4.0     2.9    -0.4
 nonOTC   object  7     0.4     2.2    -0.4
 nonOTC    house  7    -0.6    -1.1     0.0
Control     face  7     0.1     0.7     0.1
Control     word  7    -0.1    -2.0    -2.9
Control   object 14     3.0     0.9    -0.6
Control    house 14     1.3    -5.0     1.6


In [13]:
# =============================================================================
# CELL 9: DIRECTIONAL DRIFT - ANOVA: Do Groups Differ in Drift Direction?
# =============================================================================

print("\n" + "=" * 80)
print("ANOVA: Do groups differ in drift DIRECTION?")
print("=" * 80)

for coord, label in [('delta_x', 'Medial-Lateral'), 
                     ('delta_y', 'Anterior-Posterior'), 
                     ('delta_z', 'Superior-Inferior')]:
    
    groups_data = []
    group_names = []
    for g in ['OTC', 'nonOTC', 'Control']:
        vals = dir_drift_filtered[dir_drift_filtered['group'] == g][coord].dropna()
        if len(vals) > 0:
            groups_data.append(vals)
            group_names.append(g)
    
    if len(groups_data) >= 2:
        f_stat, p_val = f_oneway(*groups_data)
        result = '→ GROUPS DIFFER' if p_val < 0.05 else '→ No group differences'
        print(f"\n{label} (Δ{coord[-1]}):")
        print(f"  F = {f_stat:.3f}, p = {p_val:.4f} {result}")
        
        # Post-hoc if significant
        if p_val < 0.05:
            for i, g1 in enumerate(group_names):
                for g2 in group_names[i+1:]:
                    t, p = ttest_ind(
                        dir_drift_filtered[dir_drift_filtered['group'] == g1][coord].dropna(),
                        dir_drift_filtered[dir_drift_filtered['group'] == g2][coord].dropna()
                    )
                    sig = '*' if p < 0.05 else ''
                    print(f"    {g1} vs {g2}: t={t:.2f}, p={p:.4f}{sig}")


ANOVA: Do groups differ in drift DIRECTION?

Medial-Lateral (Δx):
  F = 1.800, p = 0.1711 → No group differences

Anterior-Posterior (Δy):
  F = 2.211, p = 0.1154 → No group differences

Superior-Inferior (Δz):
  F = 0.043, p = 0.9583 → No group differences


In [None]:
# =============================================================================
# CELL 10: VISUALIZATION - Box Plots of Signed Drift
# =============================================================================

fig, axes = plt.subplots(1, 3, figsize=(14, 5))

coord_info = [
    ('delta_x', 'Δx: Medial ← → Lateral'),
    ('delta_y', 'Δy: Posterior ← → Anterior'), 
    ('delta_z', 'Δz: Inferior ← → Superior')
]

group_order = ['OTC', 'nonOTC', 'Control']
palette = {'OTC': 'coral', 'nonOTC': 'skyblue', 'Control': 'lightgreen'}

for ax, (coord, title) in zip(axes, coord_info):
    sns.boxplot(data=dir_drift_filtered, x='group', y=coord, 
                order=group_order, palette=palette, ax=ax)
    
    sns.stripplot(data=dir_drift_filtered, x='group', y=coord,
                  order=group_order, color='black', alpha=0.5, size=4, ax=ax)
    
    # Reference line at 0 (no systematic drift)
    ax.axhline(0, color='red', linestyle='--', linewidth=2, alpha=0.7)
    
    ax.set_xlabel('')
    ax.set_ylabel(f'{title.split(":")[0]} (mm)')
    ax.set_title(title)
    
    # Annotate means
    for i, group in enumerate(group_order):
        grp_data = dir_drift_filtered[dir_drift_filtered['group'] == group][coord]
        if len(grp_data) > 0:
            mean = grp_data.mean()
            sem = grp_data.sem()
            ax.annotate(f'{mean:.1f}±{sem:.1f}', 
                       xy=(i, ax.get_ylim()[1] * 0.85),
                       ha='center', fontsize=9, fontweight='bold')

plt.suptitle('Signed Coordinate Drift by Group\n(Centered on 0 = no systematic directional drift)', 
             fontsize=12, y=1.02)
plt.tight_layout()
plt.savefig('directional_drift_boxplots.png', dpi=150, bbox_inches='tight')
plt.show()

In [None]:
# =============================================================================
# CELL 11: VISUALIZATION - Arrow Plot Showing Drift Directions
# =============================================================================

fig, axes = plt.subplots(1, 3, figsize=(15, 5))

category_colors = {'face': '#FFB347', 'word': '#6495ED', 
                   'object': '#40E0D0', 'house': '#B39EB5'}

for ax, group in zip(axes, ['OTC', 'nonOTC', 'Control']):
    group_data = dir_drift_filtered[dir_drift_filtered['group'] == group]
    
    # Plot arrows from origin (y=delta_y for A-P, z=delta_z for S-I)
    for _, row in group_data.iterrows():
        ax.arrow(0, 0, row['delta_y'], row['delta_z'], 
                head_width=0.8, head_length=0.5, 
                fc=category_colors.get(row['category'], 'gray'),
                ec=category_colors.get(row['category'], 'gray'),
                alpha=0.6, linewidth=1.5)
    
    # Reference circle at 10mm (typical noise level)
    circle = plt.Circle((0, 0), 10, fill=False, linestyle='--', 
                        color='gray', alpha=0.5, linewidth=2)
    ax.add_patch(circle)
    
    ax.axhline(0, color='black', linewidth=0.5, alpha=0.3)
    ax.axvline(0, color='black', linewidth=0.5, alpha=0.3)
    ax.set_xlim(-30, 30)
    ax.set_ylim(-30, 30)
    ax.set_aspect('equal')
    ax.set_xlabel('Δy: Anterior-Posterior (mm)')
    ax.set_ylabel('Δz: Superior-Inferior (mm)')
    ax.set_title(f'{group} (n={len(group_data)})')

# Legend
handles = [plt.Line2D([0], [0], color=c, marker='>', linestyle='', 
                       markersize=10, label=cat.title()) 
           for cat, c in category_colors.items()]
fig.legend(handles=handles, loc='center right', bbox_to_anchor=(1.08, 0.5))

plt.suptitle('Drift Direction (Y-Z plane)\nArrows show T2 - T1 displacement\n'
             '(Random directions = no systematic drift)', fontsize=11, y=1.05)
plt.tight_layout()
plt.savefig('directional_drift_arrows.png', dpi=150, bbox_inches='tight')
plt.show()

print("Interpretation: Random arrow directions → measurement noise (matches literature)")
print("               Clustered arrows → systematic spatial shift")

In [None]:
# =============================================================================
# CELL 12: VISUALIZATION - Signed vs Unsigned Comparison
# =============================================================================

fig, axes = plt.subplots(1, 2, figsize=(12, 5))

# Left: Euclidean (unsigned) - original analysis
ax = axes[0]
sns.boxplot(data=dir_drift_filtered, x='group', y='euclidean_dist',
            order=['OTC', 'nonOTC', 'Control'],
            palette=palette, ax=ax)
ax.set_ylabel('Euclidean Distance (mm)')
ax.set_xlabel('')
ax.set_title('Unsigned Drift (Euclidean)\nAlways positive - measures variability')

# Right: Mean signed drift (should be ~0 if no systematic drift)
ax = axes[1]
summary = dir_drift_filtered.groupby('group')[['delta_x', 'delta_y', 'delta_z']].mean()
summary = summary.reindex(['OTC', 'nonOTC', 'Control'])
summary.plot(kind='bar', ax=ax, width=0.8, 
             color=['#e74c3c', '#3498db', '#2ecc71'])
ax.axhline(0, color='black', linewidth=1.5)
ax.set_ylabel('Mean Signed Drift (mm)')
ax.set_xlabel('')
ax.set_title('Mean Signed Drift by Axis\nShould be ~0 if no systematic direction')
ax.set_xticklabels(['OTC', 'nonOTC', 'Control'], rotation=0)
ax.legend(title='Axis', labels=['M-L', 'A-P', 'S-I'])

plt.tight_layout()
plt.savefig('signed_vs_unsigned_comparison.png', dpi=150, bbox_inches='tight')
plt.show()

In [17]:
# =============================================================================
# CELL 13: Summary and Interpretation
# =============================================================================

print("\n" + "=" * 80)
print("SUMMARY: Directional Drift Analysis")
print("=" * 80)

# Summary table
summary = dir_drift_filtered.groupby('group').agg({
    'delta_x': ['mean', 'std'],
    'delta_y': ['mean', 'std'],
    'delta_z': ['mean', 'std'],
    'euclidean_dist': ['mean', 'std', 'count']
}).round(2)

print("\nMean ± SD by Group:")
print(summary)

print("""
================================================================================
INTERPRETATION
================================================================================

1. SIGNED DRIFT (delta_x, delta_y, delta_z):
   - If means ≈ 0 with p > 0.05: Peaks fluctuate randomly around stable location
   - This matches Dehaene-Lambertz (2018): "no linear effect of session" (F < 1)

2. UNSIGNED DRIFT (Euclidean distance):
   - Your original values (5-15mm) reflect VARIABILITY/noise
   - Expected even with perfectly stable peaks due to:
     • Voxel resolution (3mm isotropic)
     • Registration noise (1-3mm)
     • Threshold effects on peak voxel selection

3. KEY FINDING:
   - If signed means ≈ 0 AND p > 0.05: Results MATCH the literature
   - Euclidean distances measure variability, not systematic drift
   - Both measures are consistent - just answering different questions

4. OTC vs CONTROLS:
   - If no group differences in signed drift direction → OTC peaks are as
     spatially stable as Controls (reorganization occurs WITHIN stable locations)
================================================================================
""")

# Save results
dir_drift_filtered.to_csv('directional_drift_results.csv', index=False)
drift_stats.to_csv('directional_drift_statistics.csv', index=False)
print("✓ Saved: directional_drift_results.csv, directional_drift_statistics.csv")


SUMMARY: Directional Drift Analysis

Mean ± SD by Group:
        delta_x       delta_y        delta_z       euclidean_dist             
           mean   std    mean    std    mean   std           mean    std count
group                                                                         
Control    1.44  6.16   -1.57  10.61   -0.10  5.65           8.84  10.34    42
OTC       -0.36  3.94    4.15  14.40    0.20  3.81          10.24  12.10    24
nonOTC    -0.75  4.36    1.42   6.71   -0.12  2.71           5.56   6.48    28

INTERPRETATION

1. SIGNED DRIFT (delta_x, delta_y, delta_z):
   - If means ≈ 0 with p > 0.05: Peaks fluctuate randomly around stable location
   - This matches Dehaene-Lambertz (2018): "no linear effect of session" (F < 1)

2. UNSIGNED DRIFT (Euclidean distance):
   - Your original values (5-15mm) reflect VARIABILITY/noise
   - Expected even with perfectly stable peaks due to:
     • Voxel resolution (3mm isotropic)
     • Registration noise (1-3mm)
     • Thresh