In [40]:
import pandas as pd
import numpy as np

# ============================================================
# 1. CONFIGURATION & DATA LOADING
# ============================================================
FILE_PATH = '/user_data/csimmon2/git_repos/long_pt/results_final.csv'

MEASURES = {
    'Selectivity_Change': 'Selectivity',
    'Geometry_Preservation_6mm': 'Geometry',
    'Spatial_Relocation_mm': 'Spatial Drift',
    'MDS_Shift': 'MDS'
}

# Load Data
try:
    df = pd.read_csv(FILE_PATH)
    # Cleanup empty columns if they exist
    if 'Unnamed: 22' in df.columns:
        df = df.drop(columns=['Unnamed: 22'])
    print(f"Data loaded successfully. Found {len(df)} rows.")
except FileNotFoundError:
    print(f"Error: Could not find '{FILE_PATH}'. Please check the file name and path.")

# ============================================================
# 2. DATA PROCESSING FUNCTIONS
# ============================================================

def process_data_avg_controls(df, measures):
    """
    1. Separates Patients and Controls.
    2. Averages Left and Right hemisphere data for Controls to create a single baseline 
       per Subject per Category.
    3. Recombines into a clean dataset (1 row per subject per category).
    """
    # Split Groups
    controls = df[df['Group'] == 'control'].copy()
    patients = df[df['Group'] != 'control'].copy()
    
    # Columns to average
    numeric_cols = measures + ['age_1', 'yr_gap']
    
    # Group Controls: Average L/R hemispheres
    controls_agg = controls.groupby(['Subject', 'Category'])[numeric_cols].mean().reset_index()
    
    # Restore Metadata (Group, Category_Type)
    meta = controls.groupby(['Subject', 'Category'])[['Group', 'Category_Type']].first().reset_index()
    controls_final = pd.merge(controls_agg, meta, on=['Subject', 'Category'])
    
    # Combine with Patients
    common_cols = ['Subject', 'Group', 'Category', 'Category_Type'] + numeric_cols
    return pd.concat([patients[common_cols], controls_final[common_cols]], ignore_index=True)

def compute_instability_gap(df, measure):
    """
    Calculates the 'Instability Gap' for Q1.
    Formula: Mean(Bilateral Categories) - Mean(Unilateral Categories)
    Bilateral: House, Object
    Unilateral: Face, Word
    """
    # Filter for the 4 core categories
    df_filtered = df[df['Category'].isin(['Face', 'House', 'Object', 'Word'])].copy()
    
    # Group by Subject & Category Type (Bilateral vs Unilateral)
    subj_means = df_filtered.groupby(['Subject', 'Group', 'Category_Type'])[measure].mean().reset_index()
    
    # Pivot to columns: [Bilateral, Unilateral]
    pivot = subj_means.pivot(index=['Subject', 'Group'], columns='Category_Type', values=measure).reset_index()
    
    # Calculate Gap
    if 'Bilateral' in pivot.columns and 'Unilateral' in pivot.columns:
        pivot['Gap'] = pivot['Bilateral'] - pivot['Unilateral']
        return pivot.dropna(subset=['Gap'])
    else:
        return pd.DataFrame()

# ============================================================
# 3. STATISTICAL ENGINE (BOOTSTRAP)
# ============================================================

def bootstrap_analysis(group_a, group_b, n_boot=100000, seed=42):
    """
    Non-Parametric Bootstrap to compare Group A vs Group B.
    Returns:
    - Observed Difference
    - 95% Confidence Interval
    - Probability of Direction (Bayesian proxy)
    """
    np.random.seed(seed)
    n_a = len(group_a)
    n_b = len(group_b)
    
    obs_diff = np.mean(group_a) - np.mean(group_b)
    
    # Efficient Matrix Sampling
    sample_a = np.random.choice(group_a, size=(n_boot, n_a), replace=True)
    sample_b = np.random.choice(group_b, size=(n_boot, n_b), replace=True)
    
    boot_diffs = np.mean(sample_a, axis=1) - np.mean(sample_b, axis=1)
    
    # 95% CI
    ci_low = np.percentile(boot_diffs, 2.5)
    ci_high = np.percentile(boot_diffs, 97.5)
    
    # Probability of Direction (Chance that diff is truly positive/negative)
    if obs_diff > 0:
        prob = np.mean(boot_diffs > 0)
    else:
        prob = np.mean(boot_diffs < 0)
        
    return obs_diff, ci_low, ci_high, prob

# ============================================================
# 4. RUN ANALYSIS
# ============================================================

# Step A: Process Data
print("Processing data (Averaging Controls)...")
df_clean = process_data_avg_controls(df, list(MEASURES.keys()))

# --- Q1 ANALYSIS: THE INSTABILITY GAP ---
print("\n" + "="*100)
print("Q1: MAIN EFFECT - THE INSTABILITY GAP")
print("Definition: Mean(House, Object) - Mean(Face, Word)")
print("Hypothesis: OTC Patients > Controls & nonOTC")
print("="*100)
print(f"{'Measure':<15} {'Compar.':<12} {'OTC Gap':<8} {'Ref Gap':<8} {'Diff':<8} {'95% CI':<20} {'Prob.Dir.':<10} {'Sig'}")
print("-" * 100)

for col, name in MEASURES.items():
    gaps = compute_instability_gap(df_clean, col)
    
    otc = gaps[gaps['Group'] == 'OTC']['Gap'].values
    ctrl = gaps[gaps['Group'] == 'control']['Gap'].values
    nonotc = gaps[gaps['Group'] == 'nonOTC']['Gap'].values
    
    comparisons = [('vs Control', ctrl), ('vs nonOTC', nonotc)]
    
    for comp_name, comp_data in comparisons:
        if len(otc) == 0 or len(comp_data) == 0: continue
            
        diff, low, high, prob = bootstrap_analysis(otc, comp_data)
        is_sig = "*" if (low > 0 and high > 0) or (low < 0 and high < 0) else "ns"
        
        print(f"{name:<15} {comp_name:<12} {np.mean(otc):<8.3f} {np.mean(comp_data):<8.3f} {diff:<8.3f} [{low:.3f}, {high:.3f}]   {prob:<10.4f} {is_sig}")

# --- Q2 ANALYSIS: CATEGORY SPECIFICITY ---
print("\n" + "="*100)
print("Q2: CATEGORY SPECIFICITY")
print("Hypothesis: Deficit is specific to Objects (OTC-dependent) and maybe Houses")
print("="*100)
print(f"{'Measure':<15} {'Category':<10} {'Compar.':<12} {'Diff':<8} {'95% CI':<20} {'Prob.Dir.':<10} {'Sig'}")
print("-" * 100)

categories = ['Face', 'Word', 'Object', 'House'] # Sorted logically

for col, name in MEASURES.items():
    print(f"--- {name} ---")
    for cat in categories:
        df_cat = df_clean[df_clean['Category'] == cat]
        
        otc = df_cat[df_cat['Group'] == 'OTC'][col].values
        # Compare mainly against nonOTC for anatomical specificity, or add Control loop if desired
        nonotc = df_cat[df_cat['Group'] == 'nonOTC'][col].values
        
        if len(otc) == 0 or len(nonotc) == 0: continue
            
        diff, low, high, prob = bootstrap_analysis(otc, nonotc)
        is_sig = "*" if (low > 0 and high > 0) or (low < 0 and high < 0) else "ns"
        
        # Only printing significant or near-significant results to keep it clean? 
        # Or print all. Let's print vs nonOTC as that is the critical Q3 test.
        print(f"{name:<15} {cat:<10} {'vs nonOTC':<12} {diff:<8.3f} [{low:.3f}, {high:.3f}]   {prob:<10.4f} {is_sig}")

Data loaded successfully. Found 132 rows.
Processing data (Averaging Controls)...

Q1: MAIN EFFECT - THE INSTABILITY GAP
Definition: Mean(House, Object) - Mean(Face, Word)
Hypothesis: OTC Patients > Controls & nonOTC
Measure         Compar.      OTC Gap  Ref Gap  Diff     95% CI               Prob.Dir.  Sig
----------------------------------------------------------------------------------------------------
Selectivity     vs Control   0.275    0.109    0.166    [0.019, 0.337]   0.9882     *
Selectivity     vs nonOTC    0.275    0.008    0.267    [0.132, 0.428]   1.0000     *
Geometry        vs Control   -0.289   -0.096   -0.193   [-0.340, -0.043]   0.9939     *
Geometry        vs nonOTC    -0.289   -0.036   -0.253   [-0.404, -0.103]   0.9995     *
Spatial Drift   vs Control   -5.535   -0.344   -5.191   [-13.376, 2.799]   0.8928     ns
Spatial Drift   vs nonOTC    -5.535   -1.826   -3.709   [-11.513, 3.960]   0.8206     ns
MDS             vs Control   -0.023   0.050    -0.073   [-0.142,

In [41]:
import pandas as pd
import numpy as np

# ============================================================
# 1. CONFIGURATION & DATA LOADING
# ============================================================
FILE_PATH = '/user_data/csimmon2/git_repos/long_pt/results_final.csv'

MEASURES = {
    'Selectivity_Change': 'Selectivity',
    'Geometry_Preservation_6mm': 'Geometry',
    'Spatial_Relocation_mm': 'Spatial Drift',
    'MDS_Shift': 'MDS'
}

# Category lateralization (for Ayzenberg-style analysis)
CATEGORY_PREFERRED_HEMI = {
    'Face': 'R',   # Faces typically right-lateralized
    'Word': 'L',   # Words typically left-lateralized
    'House': None, # Bilateral - no preferred
    'Object': None # Bilateral - no preferred
}

# Load Data
df = pd.read_csv(FILE_PATH)
if 'Unnamed: 22' in df.columns:
    df = df.drop(columns=['Unnamed: 22'])
print(f"Data loaded: {len(df)} rows")

# ============================================================
# 2. DATA PROCESSING FUNCTIONS
# ============================================================

def process_data_avg_controls(df, measures):
    """
    Average L/R hemisphere data for Controls to create single baseline per Subject per Category.
    """
    controls = df[df['Group'] == 'control'].copy()
    patients = df[df['Group'] != 'control'].copy()
    
    numeric_cols = measures + ['age_1', 'yr_gap']
    
    # Average controls across hemispheres
    controls_agg = controls.groupby(['Subject', 'Category'])[numeric_cols].mean().reset_index()
    meta = controls.groupby(['Subject', 'Category'])[['Group', 'Category_Type']].first().reset_index()
    controls_final = pd.merge(controls_agg, meta, on=['Subject', 'Category'])
    
    common_cols = ['Subject', 'Group', 'Category', 'Category_Type'] + numeric_cols
    return pd.concat([patients[common_cols], controls_final[common_cols]], ignore_index=True)


def get_controls_by_hemisphere(df, category, hemisphere):
    """
    Get control data for a specific category and hemisphere.
    """
    controls = df[(df['Group'] == 'control') & 
                  (df['Category'] == category) & 
                  (df['nonpt_hemi'] == hemisphere)]
    return controls


def compute_instability_gap(df, measure):
    """
    Calculates Mean(Bilateral) - Mean(Unilateral) per subject.
    """
    df_filtered = df[df['Category'].isin(['Face', 'House', 'Object', 'Word'])].copy()
    subj_means = df_filtered.groupby(['Subject', 'Group', 'Category_Type'])[measure].mean().reset_index()
    pivot = subj_means.pivot(index=['Subject', 'Group'], columns='Category_Type', values=measure).reset_index()
    
    if 'Bilateral' in pivot.columns and 'Unilateral' in pivot.columns:
        pivot['Gap'] = pivot['Bilateral'] - pivot['Unilateral']
        return pivot.dropna(subset=['Gap'])
    return pd.DataFrame()

# ============================================================
# 3. STATISTICAL ENGINE (BOOTSTRAP)
# ============================================================

def bootstrap_analysis(group_a, group_b, n_boot=100000, seed=42):
    """
    Non-parametric bootstrap comparison.
    Returns: observed diff, 95% CI bounds, probability of direction
    """
    np.random.seed(seed)
    n_a, n_b = len(group_a), len(group_b)
    
    obs_diff = np.mean(group_a) - np.mean(group_b)
    
    sample_a = np.random.choice(group_a, size=(n_boot, n_a), replace=True)
    sample_b = np.random.choice(group_b, size=(n_boot, n_b), replace=True)
    boot_diffs = np.mean(sample_a, axis=1) - np.mean(sample_b, axis=1)
    
    ci_low = np.percentile(boot_diffs, 2.5)
    ci_high = np.percentile(boot_diffs, 97.5)
    
    prob = np.mean(boot_diffs > 0) if obs_diff > 0 else np.mean(boot_diffs < 0)
    
    return obs_diff, ci_low, ci_high, prob


def format_result(name, comparison, mean_a, mean_b, diff, ci_low, ci_high, prob, n_a, n_b):
    """Format a single result row."""
    sig = "*" if (ci_low > 0 and ci_high > 0) or (ci_low < 0 and ci_high < 0) else ""
    return f"{name:<12} {comparison:<20} {mean_a:>6.3f} {mean_b:>6.3f} {diff:>7.3f}  [{ci_low:>6.3f}, {ci_high:>6.3f}]  {prob:.3f}  n={n_a},{n_b}  {sig}"

# ============================================================
# 4. RUN ANALYSES
# ============================================================

# Process data (averaged controls)
df_clean = process_data_avg_controls(df, list(MEASURES.keys()))

# --- Q1: INSTABILITY GAP ---
print("\n" + "="*95)
print("Q1: INSTABILITY GAP  [Mean(House,Object) - Mean(Face,Word)]")
print("="*95)
print(f"{'Measure':<12} {'Comparison':<20} {'OTC':>6} {'Ref':>6} {'Diff':>7}  {'95% CI':<17}  {'P(d)':>5}  {'n':>8}  Sig")
print("-"*95)

for col, name in MEASURES.items():
    gaps = compute_instability_gap(df_clean, col)
    
    otc = gaps[gaps['Group'] == 'OTC']['Gap'].values
    ctrl = gaps[gaps['Group'] == 'control']['Gap'].values
    nonotc = gaps[gaps['Group'] == 'nonOTC']['Gap'].values
    
    for comp_name, comp_data in [('vs Control', ctrl), ('vs nonOTC', nonotc)]:
        if len(otc) == 0 or len(comp_data) == 0: 
            continue
        diff, low, high, prob = bootstrap_analysis(otc, comp_data)
        print(format_result(name, comp_name, np.mean(otc), np.mean(comp_data), 
                           diff, low, high, prob, len(otc), len(comp_data)))


# --- Q2: CATEGORY SPECIFICITY ---
print("\n" + "="*95)
print("Q2: CATEGORY SPECIFICITY (OTC vs nonOTC)")
print("="*95)
print(f"{'Measure':<12} {'Category':<20} {'OTC':>6} {'Ref':>6} {'Diff':>7}  {'95% CI':<17}  {'P(d)':>5}  {'n':>8}  Sig")
print("-"*95)

for col, name in MEASURES.items():
    for cat in ['Face', 'Word', 'Object', 'House']:
        df_cat = df_clean[df_clean['Category'] == cat]
        
        otc = df_cat[df_cat['Group'] == 'OTC'][col].values
        nonotc = df_cat[df_cat['Group'] == 'nonOTC'][col].values
        
        if len(otc) == 0 or len(nonotc) == 0: 
            continue
        
        diff, low, high, prob = bootstrap_analysis(otc, nonotc)
        print(format_result(name, cat, np.mean(otc), np.mean(nonotc),
                           diff, low, high, prob, len(otc), len(nonotc)))
    print()


# --- Q3: AYZENBERG-STYLE HEMISPHERE ANALYSIS ---
print("\n" + "="*95)
print("Q3: LATERALIZED CATEGORIES - Comparison to Controls' Preferred Hemisphere")
print("    Face: Controls' Right (preferred) vs Left (non-preferred)")
print("    Word: Controls' Left (preferred) vs Right (non-preferred)")
print("="*95)
print(f"{'Measure':<12} {'Comparison':<25} {'OTC':>6} {'Ctrl':>6} {'Diff':>7}  {'95% CI':<17}  {'P(d)':>5}  {'n':>8}  Sig")
print("-"*95)

for col, name in MEASURES.items():
    for cat in ['Face', 'Word']:
        pref_hemi = CATEGORY_PREFERRED_HEMI[cat]
        nonpref_hemi = 'L' if pref_hemi == 'R' else 'R'
        
        # Get OTC patients for this category
        otc_data = df_clean[(df_clean['Group'] == 'OTC') & (df_clean['Category'] == cat)][col].values
        
        # Get controls by hemisphere (from original df, not averaged)
        ctrl_pref = get_controls_by_hemisphere(df, cat, pref_hemi)[col].dropna().values
        ctrl_nonpref = get_controls_by_hemisphere(df, cat, nonpref_hemi)[col].dropna().values
        
        if len(otc_data) == 0:
            continue
            
        # OTC vs Controls' preferred hemisphere
        if len(ctrl_pref) > 0:
            diff, low, high, prob = bootstrap_analysis(otc_data, ctrl_pref)
            comp_label = f"{cat} vs Ctrl-{pref_hemi}(pref)"
            print(format_result(name, comp_label, np.mean(otc_data), np.mean(ctrl_pref),
                               diff, low, high, prob, len(otc_data), len(ctrl_pref)))
        
        # OTC vs Controls' non-preferred hemisphere
        if len(ctrl_nonpref) > 0:
            diff, low, high, prob = bootstrap_analysis(otc_data, ctrl_nonpref)
            comp_label = f"{cat} vs Ctrl-{nonpref_hemi}(non-pref)"
            print(format_result(name, comp_label, np.mean(otc_data), np.mean(ctrl_nonpref),
                               diff, low, high, prob, len(otc_data), len(ctrl_nonpref)))
    print()


# --- SUMMARY TABLE ---
print("\n" + "="*95)
print("INTERPRETATION KEY")
print("="*95)
print("""
* = 95% CI excludes zero (significant)
P(d) = Probability of direction (Bayesian-like confidence in sign of effect)

Q3 Interpretation (Ayzenberg framework):
  - OTC similar to Ctrl-preferred: TRUE REORGANIZATION (patient's intact hemi matches typical dominant hemi)
  - OTC similar to Ctrl-non-preferred only: UPREGULATION (patient shows typical non-dominant response)
  - OTC worse than both: IMPAIRED REORGANIZATION
""")

Data loaded: 132 rows

Q1: INSTABILITY GAP  [Mean(House,Object) - Mean(Face,Word)]
Measure      Comparison              OTC    Ref    Diff  95% CI              P(d)         n  Sig
-----------------------------------------------------------------------------------------------
Selectivity  vs Control            0.275  0.109   0.166  [ 0.019,  0.337]  0.988  n=6,9  *
Selectivity  vs nonOTC             0.275  0.008   0.267  [ 0.132,  0.428]  1.000  n=6,9  *
Geometry     vs Control           -0.289 -0.096  -0.193  [-0.340, -0.043]  0.994  n=6,9  *
Geometry     vs nonOTC            -0.289 -0.036  -0.253  [-0.404, -0.103]  1.000  n=6,9  *
Spatial Drift vs Control           -5.535 -0.344  -5.191  [-13.376,  2.799]  0.893  n=6,9  
Spatial Drift vs nonOTC            -5.535 -1.826  -3.709  [-11.513,  3.960]  0.821  n=6,9  
MDS          vs Control           -0.023  0.050  -0.073  [-0.142, -0.006]  0.984  n=6,9  *
MDS          vs nonOTC            -0.023  0.078  -0.101  [-0.178, -0.025]  0.996  n=6

In [42]:
import pandas as pd
import numpy as np

# ============================================================
# 1. CONFIGURATION & DATA LOADING
# ============================================================
FILE_PATH = '/user_data/csimmon2/git_repos/long_pt/results_final.csv'

MEASURES = {
    'Selectivity_Change': 'Selectivity',
    'Geometry_Preservation_6mm': 'Geometry',
    'Spatial_Relocation_mm': 'Spatial Drift',
    'MDS_Global': 'MDS'
}

# Category lateralization (for Ayzenberg-style analysis)
CATEGORY_PREFERRED_HEMI = {
    'Face': 'R',   # Faces typically right-lateralized
    'Word': 'L',   # Words typically left-lateralized
    'House': None, # Bilateral - no preferred
    'Object': None # Bilateral - no preferred
}

# Load Data
df = pd.read_csv(FILE_PATH)
if 'Unnamed: 22' in df.columns:
    df = df.drop(columns=['Unnamed: 22'])

# Compute MDS_Global: average across all category MDS columns (Nordt-style)
df['MDS_Global'] = df[['MDS_Face', 'MDS_House', 'MDS_Object', 'MDS_Word']].mean(axis=1)

print(f"Data loaded: {len(df)} rows")

# ============================================================
# 2. DATA PROCESSING FUNCTIONS
# ============================================================

def process_data_avg_controls(df, measures):
    """
    Average L/R hemisphere data for Controls to create single baseline per Subject per Category.
    """
    controls = df[df['Group'] == 'control'].copy()
    patients = df[df['Group'] != 'control'].copy()
    
    numeric_cols = measures + ['age_1', 'yr_gap']
    
    # Average controls across hemispheres
    controls_agg = controls.groupby(['Subject', 'Category'])[numeric_cols].mean().reset_index()
    meta = controls.groupby(['Subject', 'Category'])[['Group', 'Category_Type']].first().reset_index()
    controls_final = pd.merge(controls_agg, meta, on=['Subject', 'Category'])
    
    common_cols = ['Subject', 'Group', 'Category', 'Category_Type'] + numeric_cols
    return pd.concat([patients[common_cols], controls_final[common_cols]], ignore_index=True)


def get_controls_by_hemisphere(df, category, hemisphere):
    """
    Get control data for a specific category and hemisphere.
    """
    controls = df[(df['Group'] == 'control') & 
                  (df['Category'] == category) & 
                  (df['nonpt_hemi'] == hemisphere)]
    return controls


def compute_instability_gap(df, measure):
    """
    Calculates Mean(Bilateral) - Mean(Unilateral) per subject.
    """
    df_filtered = df[df['Category'].isin(['Face', 'House', 'Object', 'Word'])].copy()
    subj_means = df_filtered.groupby(['Subject', 'Group', 'Category_Type'])[measure].mean().reset_index()
    pivot = subj_means.pivot(index=['Subject', 'Group'], columns='Category_Type', values=measure).reset_index()
    
    if 'Bilateral' in pivot.columns and 'Unilateral' in pivot.columns:
        pivot['Gap'] = pivot['Bilateral'] - pivot['Unilateral']
        return pivot.dropna(subset=['Gap'])
    return pd.DataFrame()

# ============================================================
# 3. STATISTICAL ENGINE (BOOTSTRAP)
# ============================================================

def bootstrap_analysis(group_a, group_b, n_boot=100000, seed=42):
    """
    Non-parametric bootstrap comparison.
    Returns: observed diff, 95% CI bounds, probability of direction
    """
    np.random.seed(seed)
    n_a, n_b = len(group_a), len(group_b)
    
    obs_diff = np.mean(group_a) - np.mean(group_b)
    
    sample_a = np.random.choice(group_a, size=(n_boot, n_a), replace=True)
    sample_b = np.random.choice(group_b, size=(n_boot, n_b), replace=True)
    boot_diffs = np.mean(sample_a, axis=1) - np.mean(sample_b, axis=1)
    
    ci_low = np.percentile(boot_diffs, 2.5)
    ci_high = np.percentile(boot_diffs, 97.5)
    
    prob = np.mean(boot_diffs > 0) if obs_diff > 0 else np.mean(boot_diffs < 0)
    
    return obs_diff, ci_low, ci_high, prob


def format_result(name, comparison, mean_a, mean_b, diff, ci_low, ci_high, prob, n_a, n_b):
    """Format a single result row."""
    sig = "*" if (ci_low > 0 and ci_high > 0) or (ci_low < 0 and ci_high < 0) else ""
    return f"{name:<12} {comparison:<20} {mean_a:>6.3f} {mean_b:>6.3f} {diff:>7.3f}  [{ci_low:>6.3f}, {ci_high:>6.3f}]  {prob:.3f}  n={n_a},{n_b}  {sig}"

# ============================================================
# 4. RUN ANALYSES
# ============================================================

# Process data (averaged controls)
df_clean = process_data_avg_controls(df, list(MEASURES.keys()))

# --- Q1: INSTABILITY GAP ---
print("\n" + "="*95)
print("Q1: INSTABILITY GAP  [Mean(House,Object) - Mean(Face,Word)]")
print("="*95)
print(f"{'Measure':<12} {'Comparison':<20} {'OTC':>6} {'Ref':>6} {'Diff':>7}  {'95% CI':<17}  {'P(d)':>5}  {'n':>8}  Sig")
print("-"*95)

for col, name in MEASURES.items():
    gaps = compute_instability_gap(df_clean, col)
    
    otc = gaps[gaps['Group'] == 'OTC']['Gap'].values
    ctrl = gaps[gaps['Group'] == 'control']['Gap'].values
    nonotc = gaps[gaps['Group'] == 'nonOTC']['Gap'].values
    
    for comp_name, comp_data in [('vs Control', ctrl), ('vs nonOTC', nonotc)]:
        if len(otc) == 0 or len(comp_data) == 0: 
            continue
        diff, low, high, prob = bootstrap_analysis(otc, comp_data)
        print(format_result(name, comp_name, np.mean(otc), np.mean(comp_data), 
                           diff, low, high, prob, len(otc), len(comp_data)))


# --- Q2: CATEGORY SPECIFICITY ---
print("\n" + "="*95)
print("Q2: CATEGORY SPECIFICITY (OTC vs nonOTC)")
print("="*95)
print(f"{'Measure':<12} {'Category':<20} {'OTC':>6} {'Ref':>6} {'Diff':>7}  {'95% CI':<17}  {'P(d)':>5}  {'n':>8}  Sig")
print("-"*95)

for col, name in MEASURES.items():
    for cat in ['Face', 'Word', 'Object', 'House']:
        df_cat = df_clean[df_clean['Category'] == cat]
        
        otc = df_cat[df_cat['Group'] == 'OTC'][col].values
        nonotc = df_cat[df_cat['Group'] == 'nonOTC'][col].values
        
        if len(otc) == 0 or len(nonotc) == 0: 
            continue
        
        diff, low, high, prob = bootstrap_analysis(otc, nonotc)
        print(format_result(name, cat, np.mean(otc), np.mean(nonotc),
                           diff, low, high, prob, len(otc), len(nonotc)))
    print()


# --- Q3: AYZENBERG-STYLE HEMISPHERE ANALYSIS ---
print("\n" + "="*95)
print("Q3: LATERALIZED CATEGORIES - Comparison to Controls' Preferred Hemisphere")
print("    Face: Controls' Right (preferred) vs Left (non-preferred)")
print("    Word: Controls' Left (preferred) vs Right (non-preferred)")
print("="*95)
print(f"{'Measure':<12} {'Comparison':<25} {'OTC':>6} {'Ctrl':>6} {'Diff':>7}  {'95% CI':<17}  {'P(d)':>5}  {'n':>8}  Sig")
print("-"*95)

for col, name in MEASURES.items():
    for cat in ['Face', 'Word']:
        pref_hemi = CATEGORY_PREFERRED_HEMI[cat]
        nonpref_hemi = 'L' if pref_hemi == 'R' else 'R'
        
        # Get OTC patients for this category
        otc_data = df_clean[(df_clean['Group'] == 'OTC') & (df_clean['Category'] == cat)][col].values
        
        # Get controls by hemisphere (from original df, not averaged)
        ctrl_pref = get_controls_by_hemisphere(df, cat, pref_hemi)[col].dropna().values
        ctrl_nonpref = get_controls_by_hemisphere(df, cat, nonpref_hemi)[col].dropna().values
        
        if len(otc_data) == 0:
            continue
            
        # OTC vs Controls' preferred hemisphere
        if len(ctrl_pref) > 0:
            diff, low, high, prob = bootstrap_analysis(otc_data, ctrl_pref)
            comp_label = f"{cat} vs Ctrl-{pref_hemi}(pref)"
            print(format_result(name, comp_label, np.mean(otc_data), np.mean(ctrl_pref),
                               diff, low, high, prob, len(otc_data), len(ctrl_pref)))
        
        # OTC vs Controls' non-preferred hemisphere
        if len(ctrl_nonpref) > 0:
            diff, low, high, prob = bootstrap_analysis(otc_data, ctrl_nonpref)
            comp_label = f"{cat} vs Ctrl-{nonpref_hemi}(non-pref)"
            print(format_result(name, comp_label, np.mean(otc_data), np.mean(ctrl_nonpref),
                               diff, low, high, prob, len(otc_data), len(ctrl_nonpref)))
    print()


# --- SUMMARY TABLE ---
print("\n" + "="*95)
print("INTERPRETATION KEY")
print("="*95)
print("""
* = 95% CI excludes zero (significant)
P(d) = Probability of direction (Bayesian-like confidence in sign of effect)

Q3 Interpretation (Ayzenberg framework):
  - OTC similar to Ctrl-preferred: TRUE REORGANIZATION (patient's intact hemi matches typical dominant hemi)
  - OTC similar to Ctrl-non-preferred only: UPREGULATION (patient shows typical non-dominant response)
  - OTC worse than both: IMPAIRED REORGANIZATION
""")

Data loaded: 132 rows

Q1: INSTABILITY GAP  [Mean(House,Object) - Mean(Face,Word)]
Measure      Comparison              OTC    Ref    Diff  95% CI              P(d)         n  Sig
-----------------------------------------------------------------------------------------------
Selectivity  vs Control            0.275  0.109   0.166  [ 0.019,  0.337]  0.988  n=6,9  *
Selectivity  vs nonOTC             0.275  0.008   0.267  [ 0.132,  0.428]  1.000  n=6,9  *
Geometry     vs Control           -0.289 -0.096  -0.193  [-0.340, -0.043]  0.994  n=6,9  *
Geometry     vs nonOTC            -0.289 -0.036  -0.253  [-0.404, -0.103]  1.000  n=6,9  *
Spatial Drift vs Control           -5.535 -0.344  -5.191  [-13.376,  2.799]  0.893  n=6,9  
Spatial Drift vs nonOTC            -5.535 -1.826  -3.709  [-11.513,  3.960]  0.821  n=6,9  
MDS          vs Control            0.021  0.049  -0.027  [-0.104,  0.053]  0.751  n=6,9  
MDS          vs nonOTC             0.021  0.028  -0.007  [-0.099,  0.086]  0.559  n=6,