In [6]:
"""
Crawford Single-Case Statistics (FIXED)
Compare each OTC patient to controls
"""

import numpy as np
import pandas as pd
from scipy import stats

# Load data
results_csv = '/user_data/csimmon2/git_repos/long_pt/results_final_corrected.csv'
df = pd.read_csv(results_csv)

def crawford_t_test(patient_score, control_scores):
    """
    Crawford & Howell (1998) modified t-test for single case
    """
    # Remove NaN values
    control_scores = control_scores[~np.isnan(control_scores)]
    
    n = len(control_scores)
    if n < 2:
        return np.nan, np.nan
        
    control_mean = np.mean(control_scores)
    control_sd = np.std(control_scores, ddof=1)
    
    if control_sd == 0:
        return np.nan, np.nan
    
    t = (patient_score - control_mean) / (control_sd * np.sqrt((n + 1) / n))
    p = 2 * stats.t.sf(abs(t), df=n-1)  # two-tailed
    
    return t, p

# Debug: Check for NaN values
controls = df[df['Group'] == 'control']
print("DEBUG: Checking for NaN values in controls")
print("-" * 50)

for cat_type in ['Bilateral', 'Unilateral']:
    subset = controls[controls['Category_Type'] == cat_type]['Selectivity_Change']
    n_total = len(subset)
    n_nan = subset.isna().sum()
    n_valid = n_total - n_nan
    print(f"{cat_type}: {n_valid} valid, {n_nan} NaN, {n_total} total")

print("-" * 50)

# Get control scores (excluding NaN)
ctrl_bilateral = controls[controls['Category_Type'] == 'Bilateral']['Selectivity_Change'].dropna().values
ctrl_unilateral = controls[controls['Category_Type'] == 'Unilateral']['Selectivity_Change'].dropna().values

# Get OTC patients
otc = df[df['Group'] == 'OTC']
otc_subjects = otc['Subject'].unique()

print("\nCRAWFORD SINGLE-CASE STATISTICS")
print("=" * 70)
print(f"Control bilateral selectivity: {np.mean(ctrl_bilateral):.3f} ± {np.std(ctrl_bilateral):.3f} (n={len(ctrl_bilateral)})")
print("\nOTC Patients vs Controls (Bilateral Selectivity Change):")
print("-" * 70)
print(f"{'Subject':<12} {'Score':<10} {'Crawford t':<12} {'p-value':<10} {'Sig?'}")
print("-" * 70)

for subject in sorted(otc_subjects):
    subj_data = otc[(otc['Subject'] == subject) & (otc['Category_Type'] == 'Bilateral')]
    if len(subj_data) > 0:
        patient_score = subj_data['Selectivity_Change'].mean()
        t, p = crawford_t_test(patient_score, ctrl_bilateral)
        sig = "**" if p < 0.01 else "*" if p < 0.05 else ""
        print(f"{subject:<12} {patient_score:<10.3f} {t:<12.2f} {p:<10.3f} {sig}")

print("\n" + "=" * 70)
print(f"Control unilateral selectivity: {np.mean(ctrl_unilateral):.3f} ± {np.std(ctrl_unilateral):.3f} (n={len(ctrl_unilateral)})")
print("\nOTC Patients vs Controls (Unilateral Selectivity Change):")
print("-" * 70)
print(f"{'Subject':<12} {'Score':<10} {'Crawford t':<12} {'p-value':<10} {'Sig?'}")
print("-" * 70)

for subject in sorted(otc_subjects):
    subj_data = otc[(otc['Subject'] == subject) & (otc['Category_Type'] == 'Unilateral')]
    if len(subj_data) > 0:
        patient_score = subj_data['Selectivity_Change'].mean()
        t, p = crawford_t_test(patient_score, ctrl_unilateral)
        sig = "**" if p < 0.01 else "*" if p < 0.05 else ""
        print(f"{subject:<12} {patient_score:<10.3f} {t:<12.2f} {p:<10.3f} {sig}")

# Summary comparison
print("\n" + "=" * 70)
print("SUMMARY: Bilateral-Unilateral Gap")
print("=" * 70)
print(f"\n{'Subject':<12} {'Bilateral':<10} {'Unilateral':<10} {'Gap':<10} {'Bil vs Ctrl':<12} {'Uni vs Ctrl'}")
print("-" * 70)

for subject in sorted(otc_subjects):
    subj_data = otc[otc['Subject'] == subject]
    bil = subj_data[subj_data['Category_Type'] == 'Bilateral']['Selectivity_Change'].mean()
    uni = subj_data[subj_data['Category_Type'] == 'Unilateral']['Selectivity_Change'].mean()
    
    t_bil, p_bil = crawford_t_test(bil, ctrl_bilateral)
    t_uni, p_uni = crawford_t_test(uni, ctrl_unilateral)
    
    sig_bil = "*" if p_bil < 0.05 else ""
    sig_uni = "*" if p_uni < 0.05 else ""
    
    print(f"{subject:<12} {bil:<10.3f} {uni:<10.3f} {bil-uni:+.3f}     p={p_bil:.3f}{sig_bil:<4} p={p_uni:.3f}{sig_uni}")

DEBUG: Checking for NaN values in controls
--------------------------------------------------
Bilateral: 36 valid, 0 NaN, 36 total
Unilateral: 35 valid, 1 NaN, 36 total
--------------------------------------------------

CRAWFORD SINGLE-CASE STATISTICS
Control bilateral selectivity: 0.259 ± 0.167 (n=36)

OTC Patients vs Controls (Bilateral Selectivity Change):
----------------------------------------------------------------------
Subject      Score      Crawford t   p-value    Sig?
----------------------------------------------------------------------
OTC004       0.293      0.20         0.846      
OTC008       0.392      0.77         0.446      
OTC010       0.282      0.13         0.896      
OTC017       0.568      1.80         0.081      
OTC021       0.307      0.28         0.780      
OTC079       0.671      2.40         0.022      *

Control unilateral selectivity: 0.153 ± 0.132 (n=35)

OTC Patients vs Controls (Unilateral Selectivity Change):
----------------------------------

In [3]:
"""
Surgery Side Analysis: Left vs Right Resection
"""

import numpy as np
import pandas as pd
from scipy import stats

# Load data
results_csv = '/user_data/csimmon2/git_repos/long_pt/results_final_corrected.csv'
df = pd.read_csv(results_csv)

# Define surgery sides (from your subject info)
LEFT_RESECTION = ['OTC010', 'OTC017', 'OTC021', 'OTC079']   # Intact RH, n=4
RIGHT_RESECTION = ['OTC004', 'OTC008']                       # Intact LH, n=2

# Filter OTC only
otc = df[df['Group'] == 'OTC'].copy()

# Add surgery side column
otc['Surgery_Side'] = otc['Subject'].apply(
    lambda x: 'Left' if x in LEFT_RESECTION else 'Right' if x in RIGHT_RESECTION else 'Unknown'
)

print("SURGERY SIDE ANALYSIS")
print("=" * 70)
print(f"Left resection (intact RH):  {LEFT_RESECTION} (n={len(LEFT_RESECTION)})")
print(f"Right resection (intact LH): {RIGHT_RESECTION} (n={len(RIGHT_RESECTION)})")

# Selectivity Change by surgery side and category type
print("\n" + "=" * 70)
print("SELECTIVITY CHANGE BY SURGERY SIDE")
print("=" * 70)

for surgery in ['Left', 'Right']:
    print(f"\n{surgery.upper()} RESECTION:")
    side_data = otc[otc['Surgery_Side'] == surgery]
    
    bil = side_data[side_data['Category_Type'] == 'Bilateral']['Selectivity_Change']
    uni = side_data[side_data['Category_Type'] == 'Unilateral']['Selectivity_Change']
    
    print(f"  Bilateral:   {bil.mean():.3f} ± {bil.std():.3f} (n={len(bil)})")
    print(f"  Unilateral:  {uni.mean():.3f} ± {uni.std():.3f} (n={len(uni)})")
    print(f"  Difference:  {bil.mean() - uni.mean():+.3f}")

# Category-specific by surgery side
print("\n" + "=" * 70)
print("CATEGORY-SPECIFIC BY SURGERY SIDE")
print("=" * 70)

print(f"\n{'Surgery':<10} {'Category':<10} {'Mean':<10} {'SD':<10} {'n'}")
print("-" * 50)

for surgery in ['Left', 'Right']:
    side_data = otc[otc['Surgery_Side'] == surgery]
    for cat in ['Face', 'Word', 'Object', 'House']:
        cat_data = side_data[side_data['Category'] == cat]['Selectivity_Change']
        if len(cat_data) > 0:
            print(f"{surgery:<10} {cat:<10} {cat_data.mean():<10.3f} {cat_data.std():<10.3f} {len(cat_data)}")

# Spatial drift by surgery side
if 'Spatial_Relocation_mm' in df.columns:
    print("\n" + "=" * 70)
    print("SPATIAL DRIFT BY SURGERY SIDE AND CATEGORY")
    print("=" * 70)
    print("\nKey prediction: Face drifts more in RIGHT resection; Word drifts more in LEFT resection")
    
    print(f"\n{'Surgery':<10} {'Category':<10} {'Drift (mm)':<12} {'SD':<10}")
    print("-" * 45)
    
    for surgery in ['Left', 'Right']:
        side_data = otc[otc['Surgery_Side'] == surgery]
        for cat in ['Face', 'Word', 'Object', 'House']:
            cat_data = side_data[side_data['Category'] == cat]['Spatial_Relocation_mm']
            if len(cat_data) > 0:
                print(f"{surgery:<10} {cat:<10} {cat_data.mean():<12.1f} {cat_data.std():<10.1f}")

# Individual subject breakdown
print("\n" + "=" * 70)
print("INDIVIDUAL SUBJECT BREAKDOWN")
print("=" * 70)

print(f"\n{'Subject':<12} {'Surgery':<8} {'Bil Sel':<10} {'Uni Sel':<10} {'Diff':<10}")
print("-" * 55)

for subject in sorted(otc['Subject'].unique()):
    subj_data = otc[otc['Subject'] == subject]
    surgery = subj_data['Surgery_Side'].iloc[0]
    
    bil = subj_data[subj_data['Category_Type'] == 'Bilateral']['Selectivity_Change'].mean()
    uni = subj_data[subj_data['Category_Type'] == 'Unilateral']['Selectivity_Change'].mean()
    
    print(f"{subject:<12} {surgery:<8} {bil:<10.3f} {uni:<10.3f} {bil-uni:+.3f}")

SURGERY SIDE ANALYSIS
Left resection (intact RH):  ['OTC010', 'OTC017', 'OTC021', 'OTC079'] (n=4)
Right resection (intact LH): ['OTC004', 'OTC008'] (n=2)

SELECTIVITY CHANGE BY SURGERY SIDE

LEFT RESECTION:
  Bilateral:   0.457 ± 0.246 (n=8)
  Unilateral:  0.141 ± 0.124 (n=8)
  Difference:  +0.317

RIGHT RESECTION:
  Bilateral:   0.342 ± 0.336 (n=4)
  Unilateral:  0.150 ± 0.070 (n=4)
  Difference:  +0.193

CATEGORY-SPECIFIC BY SURGERY SIDE

Surgery    Category   Mean       SD         n
--------------------------------------------------
Left       Face       0.171      0.137      4
Left       Word       0.110      0.122      4
Left       Object     0.523      0.332      4
Left       House      0.392      0.142      4
Right      Face       0.158      0.086      2
Right      Word       0.142      0.084      2
Right      Object     0.423      0.459      2
Right      House      0.261      0.320      2

SPATIAL DRIFT BY SURGERY SIDE AND CATEGORY

Key prediction: Face drifts more in RIGHT res

In [4]:
"""
Methods Summary for PI
"""

print("""
================================================================================
METHODS SUMMARY
================================================================================

SAMPLE:
  - OTC patients: n=6 (4 left resection, 2 right resection)
  - nonOTC patients: n=9
  - Controls: n=9
  - Total: n=24 subjects, 132 observations (4 categories × varying sessions)

MEASURES:
  1. Selectivity Change (Liu Distinctiveness)
     - Definition: |T2 - T1| of mean correlation between preferred and non-preferred categories
     - Higher = more change in category selectivity
     
  2. Geometry Preservation
     - Definition: Cross-temporal correlation of category response patterns in 6mm sphere
     - Lower = worse preservation of representational structure
     
  3. MDS Shift
     - Definition: Euclidean distance moved in multidimensional scaling space
     - Higher = more reorganization in representational space
     
  4. Spatial Drift
     - Definition: Physical distance (mm) between ROI centroids at T1 and T2
     - Higher = more spatial relocation

STATISTICAL APPROACH:
  - Bootstrap resampling: 100,000 iterations, subject-level resampling
  - Confidence intervals: Percentile method (2.5th, 97.5th)
  - P-values: Proportion of bootstrap samples crossing zero
  - Single-case: Crawford & Howell (1998) modified t-test
  
CATEGORY CLASSIFICATION:
  - Bilateral: Object, House (bilateral cortical representation)
  - Unilateral: Face (RH-lateralized), Word (LH-lateralized)

KEY CONTRASTS:
  - Q1: Between-group comparisons (OTC vs nonOTC vs Control)
  - Q2: Group × Category_Type interaction
  - Q3: Bilateral vs Unilateral within OTC (PRIMARY)
  - Q4: Category-level effects (Face vs Word vs Object vs House)
  - Q5: Covariate effects (Age, Scan Gap)
  - Q6: Measure independence (multicollinearity check)
================================================================================
""")


METHODS SUMMARY

SAMPLE:
  - OTC patients: n=6 (4 left resection, 2 right resection)
  - nonOTC patients: n=9
  - Controls: n=9
  - Total: n=24 subjects, 132 observations (4 categories × varying sessions)

MEASURES:
  1. Selectivity Change (Liu Distinctiveness)
     - Definition: |T2 - T1| of mean correlation between preferred and non-preferred categories
     - Higher = more change in category selectivity
     
  2. Geometry Preservation
     - Definition: Cross-temporal correlation of category response patterns in 6mm sphere
     - Lower = worse preservation of representational structure
     
  3. MDS Shift
     - Definition: Euclidean distance moved in multidimensional scaling space
     - Higher = more reorganization in representational space
     
  4. Spatial Drift
     - Definition: Physical distance (mm) between ROI centroids at T1 and T2
     - Higher = more spatial relocation

STATISTICAL APPROACH:
  - Bootstrap resampling: 100,000 iterations, subject-level resampling
  - C

## SPACE

In [15]:
# Identifying best subject drop to match OTC age and gap
import pandas as pd
import scipy.stats as stats
import numpy as np

# 1. Load Data
# Try relative path first, then absolute path
try:
    df = pd.read_csv('results_final_corrected.csv')
except FileNotFoundError:
    df = pd.read_csv('/user_data/csimmon2/git_repos/long_pt/results_final_corrected.csv')

# 2. Define Helper Function for Comparison
def print_justification(df, group_name, drop_subjects, target_df):
    """
    Computes demographics and t-test stats between a specific group and the target (OTC).
    """
    # Filter for the group
    group_data = df[df['Group'] == group_name].copy()
    
    # Exclude subjects if specified
    label = "Original"
    if drop_subjects:
        label = f"Dropped ({', '.join([s.replace(group_name, '') for s in drop_subjects])})"
        group_data = group_data[~group_data['Subject'].isin(drop_subjects)]
    
    # Get unique subject stats (Age and Gap)
    subj_df = group_data[['Subject', 'age_1', 'yr_gap']].drop_duplicates()
    target_subj_df = target_df[['Subject', 'age_1', 'yr_gap']].drop_duplicates()
    
    # Calculate Means
    age_mean = subj_df['age_1'].mean()
    gap_mean = subj_df['yr_gap'].mean()
    target_age = target_subj_df['age_1'].mean()
    target_gap = target_subj_df['yr_gap'].mean()
    
    # Run T-Tests (Independent)
    # Null Hypothesis: Group Mean == OTC Mean. 
    # High p-value (>0.05) = Good Match (No significant difference).
    t_age, p_age = stats.ttest_ind(subj_df['age_1'], target_subj_df['age_1'], nan_policy='omit')
    t_gap, p_gap = stats.ttest_ind(subj_df['yr_gap'], target_subj_df['yr_gap'], nan_policy='omit')
    
    # Print Results
    print(f"{group_name} [{label}]:")
    print(f"  N = {len(subj_df)}")
    print(f"  Age: {age_mean:.2f} (OTC: {target_age:.2f}) | p-val: {p_age:.3f}")
    print(f"  Gap: {gap_mean:.2f} (OTC: {target_gap:.2f}) | p-val: {p_gap:.3f}")
    print("-" * 45)

# 3. Execution
otc_df = df[df['Group'] == 'OTC']

print("--- Justification for Dropping Subjects ---\n")

# Control Group Justification
print(f"Target OTC (N={len(otc_df['Subject'].unique())})")
print("-" * 45)
print_justification(df, 'control', [], otc_df)
print_justification(df, 'control', ['control025', 'control027'], otc_df)

# nonOTC Group Justification
print_justification(df, 'nonOTC', [], otc_df)
print_justification(df, 'nonOTC', ['nonOTC045', 'nonOTC072'], otc_df)

--- Justification for Dropping Subjects ---

Target OTC (N=6)
---------------------------------------------
control [Original]:
  N = 9
  Age: 11.72 (OTC: 13.51) | p-val: 0.230
  Gap: 1.77 (OTC: 1.56) | p-val: 0.715
---------------------------------------------
control [Dropped (025, 027)]:
  N = 7
  Age: 12.66 (OTC: 13.51) | p-val: 0.545
  Gap: 1.63 (OTC: 1.56) | p-val: 0.915
---------------------------------------------
nonOTC [Original]:
  N = 9
  Age: 14.79 (OTC: 13.51) | p-val: 0.366
  Gap: 0.94 (OTC: 1.56) | p-val: 0.244
---------------------------------------------
nonOTC [Dropped (045, 072)]:
  N = 7
  Age: 14.16 (OTC: 13.51) | p-val: 0.650
  Gap: 0.97 (OTC: 1.56) | p-val: 0.323
---------------------------------------------
