<a href="https://colab.research.google.com/github/grabuffo/BrainStim_ANN_fMRI_HCP/blob/main/notebooks/Validate_TMS_fMRI_ANN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Validate TMS-fMRI Simulations with Population ANN

Comprehensive validation of synthetic TMS-fMRI data by comparing empirical vs. simulated connectivity patterns.

## Validation Strategy

### i) **Target-Seed Validation** (per target region)
For each stimulated target region, compute seed-based FC (rest, stim, delta) correlations between empirical and simulated data.

### ii) **All-Seeds Validation** (per target region, repeated for all 450 seeds)
For each target region and each possible seed (450 ROIs), compute seed-FC correlations. Summarize:
- Mean correlation for each seed
- Statistical significance (p-value) for each seed
- **Percentage of significant seeds** (how many seeds have r significantly > 0?)

### iii) **Full FC Validation** (entire correlation matrices)
Compare entire static FC and delta-FC matrices between empirical and simulated across all conditions.

## Setup

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

import os, sys, pickle, json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import pearsonr, ttest_1samp

print("✓ Imports successful")

## Define Paths

In [None]:
BASE = "/content/drive/MyDrive/Colab Notebooks/Brain_Stim_ANN/data"

DATASET_EMP_PKL = os.path.join(BASE, "TMS_fMRI", "dataset_tian50_schaefer400_allruns.pkl")
PREPROC_ROOT = os.path.join(BASE, "preprocessed_subjects_tms_fmri")
OUT_DIR = os.path.join(PREPROC_ROOT, "ANN_vs_tms_fmri")

DATASET_SIM_PKL = os.path.join(OUT_DIR, "dataset_simulated_populationANN.pkl")
VALIDATION_RESULTS_JSON = os.path.join(OUT_DIR, "validation_results_comprehensive.json")

os.makedirs(OUT_DIR, exist_ok=True)

print(f"✓ Empirical dataset: {DATASET_EMP_PKL}")
print(f"✓ Simulated dataset: {DATASET_SIM_PKL}")
print(f"✓ Output: {VALIDATION_RESULTS_JSON}")

## Load Datasets

In [None]:
print("Loading empirical dataset...")
with open(DATASET_EMP_PKL, "rb") as f:
    dataset_emp = pickle.load(f)
print(f"✓ Loaded {len(dataset_emp)} subjects (empirical)")

print("\nLoading simulated dataset...")
with open(DATASET_SIM_PKL, "rb") as f:
    dataset_sim = pickle.load(f)
print(f"✓ Loaded {len(dataset_sim)} subjects (simulated)")

# Sanity check
assert set(dataset_emp.keys()) == set(dataset_sim.keys()), "Subject mismatch!"
print(f"\n✓ Subjects match across datasets")

## Helper Functions

In [None]:
def safe_target_idx(target_vec):
    """Extract target region index from one-hot vector."""
    if target_vec is None:
        return None
    v = np.asarray(target_vec).astype(int).ravel()
    if v.size == 0 or v.sum() != 1:
        return None
    return int(np.argmax(v))

def seed_based_fc(ts, seed_idx, all_regions=True):
    """Compute seed-based FC: correlation of seed with all other regions.
    
    Args:
        ts: (T, N) time series
        seed_idx: index of seed region
        all_regions: if True use all 450 regions; if False use cortical only (400)
    
    Returns:
        fc_seed: (N-1,) correlation vector (excluding seed's autocorrelation)
    """
    if not all_regions:
        ts = ts[:, 50:]  # Skip Tian 50, keep Schaefer 400
        seed_idx = seed_idx - 50  # Adjust seed index
    
    if seed_idx < 0 or seed_idx >= ts.shape[1]:
        return None
    
    seed_ts = ts[:, seed_idx]  # (T,)
    # Correlate seed with all regions (including itself initially)
    corrmat = np.corrcoef(seed_ts, ts.T)  # (1+N, 1+N)
    fc_seed = corrmat[0, 1:]  # (N,) - correlations with all regions
    return fc_seed

def compute_fc_matrix(ts):
    """Compute full FC matrix from time series."""
    return np.corrcoef(ts.T).astype(np.float32)

def upper_triangle_vec(mat, k=1):
    """Extract upper triangle as 1D vector."""
    iu = np.triu_indices(mat.shape[0], k=k)
    return mat[iu]

print("✓ Helper functions defined")

---
# Part I: Target-Seed Validation

For each target region, validate using only that region as the seed.

In [None]:
print("="*70)
print("PART I: TARGET-SEED VALIDATION")
print("="*70)
print("\nFor each target region, compute seed-FC correlations (empirical vs simulated)")
print("Conditions: REST, STIM, DELTA (stim - rest)\n")

# Organize results by target
target_seed_results = {}

for sub_id in sorted(dataset_emp.keys()):
    if sub_id not in dataset_sim:
        continue
    
    sub_emp = dataset_emp[sub_id]
    sub_sim = dataset_sim[sub_id]
    
    # --- REST condition (baseline) ---
    rest_emp_list = []
    rest_sim_list = []
    
    for run in sub_emp.get("task-rest", {}).values():
        ts = run.get("time series")
        if isinstance(ts, np.ndarray) and ts.shape[1] >= 450:
            rest_emp_list.append(ts)
    
    for run in sub_sim.get("task-rest", {}).values():
        ts = run.get("time series")
        if isinstance(ts, np.ndarray) and ts.shape[1] >= 450:
            rest_sim_list.append(ts)
    
    if not rest_emp_list or not rest_sim_list:
        continue
    
    # Average across rest runs
    rest_emp = np.concatenate(rest_emp_list, axis=0)
    rest_sim = np.concatenate(rest_sim_list, axis=0)
    
    # Compute rest FC
    fc_rest_emp = compute_fc_matrix(rest_emp)
    fc_rest_sim = compute_fc_matrix(rest_sim)
    
    # --- STIM condition ---
    stim_runs_emp = sub_emp.get("task-stim", {})
    stim_runs_sim = sub_sim.get("task-stim", {})
    
    if not stim_runs_emp or not stim_runs_sim:
        continue
    
    # Process each stim run
    for run_idx, run_emp in stim_runs_emp.items():
        run_sim = stim_runs_sim.get(run_idx)
        if run_sim is None:
            continue
        
        ts_emp = run_emp.get("time series")
        ts_sim = run_sim.get("time series")
        target_vec = run_emp.get("target")
        
        if not isinstance(ts_emp, np.ndarray) or not isinstance(ts_sim, np.ndarray):
            continue
        if ts_emp.shape[1] < 450 or ts_sim.shape[1] < 450:
            continue
        
        target_idx = safe_target_idx(target_vec)
        if target_idx is None:
            continue
        
        # Key: target region index
        target_key = f"target_{target_idx:03d}"
        
        if target_key not in target_seed_results:
            target_seed_results[target_key] = {
                'target_idx': target_idx,
                'correlations': []
            }
        
        # Compute seed-based FC using target as seed
        fc_seed_rest_emp = seed_based_fc(rest_emp, target_idx, all_regions=True)
        fc_seed_rest_sim = seed_based_fc(rest_sim, target_idx, all_regions=True)
        
        fc_seed_stim_emp = seed_based_fc(ts_emp, target_idx, all_regions=True)
        fc_seed_stim_sim = seed_based_fc(ts_sim, target_idx, all_regions=True)
        
        if fc_seed_rest_emp is None or fc_seed_stim_emp is None:
            continue
        
        # Correlate seed-FC vectors
        r_rest = np.corrcoef(fc_seed_rest_emp, fc_seed_rest_sim)[0, 1]
        r_stim = np.corrcoef(fc_seed_stim_emp, fc_seed_stim_sim)[0, 1]
        
        # Delta: stimulation-induced change
        delta_emp = fc_seed_stim_emp - fc_seed_rest_emp
        delta_sim = fc_seed_stim_sim - fc_seed_rest_sim
        r_delta = np.corrcoef(delta_emp, delta_sim)[0, 1]
        
        target_seed_results[target_key]['correlations'].append({
            'subject': sub_id,
            'r_rest': float(r_rest),
            'r_stim': float(r_stim),
            'r_delta': float(r_delta),
        })

print(f"\n✓ Computed target-seed correlations for {len(target_seed_results)} target regions")
print(f"\nPer-target summary (first 5 targets):")
for target_key in sorted(target_seed_results.keys())[:5]:
    n_corrs = len(target_seed_results[target_key]['correlations'])
    print(f"  {target_key}: {n_corrs} observations")

### Part I Results: Target-Seed Summary

In [None]:
print("\n" + "="*70)
print("TARGET-SEED VALIDATION: SUMMARY")
print("="*70 + "\n")

summary_by_condition = {
    'rest': [],
    'stim': [],
    'delta': []
}

target_summary_table = []

for target_key in sorted(target_seed_results.keys()):
    corrs = target_seed_results[target_key]['correlations']
    
    r_rest_vals = np.array([c['r_rest'] for c in corrs if np.isfinite(c['r_rest'])])
    r_stim_vals = np.array([c['r_stim'] for c in corrs if np.isfinite(c['r_stim'])])
    r_delta_vals = np.array([c['r_delta'] for c in corrs if np.isfinite(c['r_delta'])])
    
    summary_by_condition['rest'].extend(r_rest_vals)
    summary_by_condition['stim'].extend(r_stim_vals)
    summary_by_condition['delta'].extend(r_delta_vals)
    
    target_summary_table.append({
        'Target': target_key,
        'N': len(corrs),
        'Mean_r_rest': r_rest_vals.mean() if len(r_rest_vals) > 0 else np.nan,
        'Mean_r_stim': r_stim_vals.mean() if len(r_stim_vals) > 0 else np.nan,
        'Mean_r_delta': r_delta_vals.mean() if len(r_delta_vals) > 0 else np.nan,
        'Std_r_delta': r_delta_vals.std() if len(r_delta_vals) > 0 else np.nan,
    })

df_targets = pd.DataFrame(target_summary_table)
print(df_targets.to_string(index=False))

print("\n" + "="*70)
print("POOLED ACROSS ALL TARGETS")
print("="*70 + "\n")

for condition in ['rest', 'stim', 'delta']:
    data = np.array(summary_by_condition[condition])
    data = data[np.isfinite(data)]
    t_stat, t_p = ttest_1samp(data, 0) if len(data) > 0 else (np.nan, np.nan)
    
    print(f"\n{condition.upper()} condition:")
    print(f"  N observations: {len(data)}")
    print(f"  Mean r: {data.mean():.4f} ± {data.std():.4f}")
    print(f"  Range: [{data.min():.4f}, {data.max():.4f}]")
    print(f"  t-test p-value: {t_p:.6f}")
    print(f"  Significant? {'YES' if t_p < 0.05 else 'NO'}")

---
# Part II: All-Seeds Validation

For each target region, repeat the analysis using ALL 450 regions as seeds. Calculate:
- Correlation for each seed
- Significance (p-value) for each seed
- **% of seeds with significant correlation**

In [None]:
print("\n" + "="*70)
print("PART II: ALL-SEEDS VALIDATION (PER TARGET REGION)")
print("="*70)
print("\nFor each target region, compute seed-FC for all 450 seeds.")
print("Calculate significance and % significant seeds.\n")

all_seeds_results = {}

# Compute global rest FC (across all subjects/runs)
print("Computing global rest FC matrices...")
rest_emp_all = []
rest_sim_all = []

for sub_id in sorted(dataset_emp.keys()):
    for run in dataset_emp[sub_id].get("task-rest", {}).values():
        ts = run.get("time series")
        if isinstance(ts, np.ndarray) and ts.shape[1] >= 450:
            rest_emp_all.append(ts)
    
    for run in dataset_sim[sub_id].get("task-rest", {}).values():
        ts = run.get("time series")
        if isinstance(ts, np.ndarray) and ts.shape[1] >= 450:
            rest_sim_all.append(ts)

rest_emp_concat = np.concatenate(rest_emp_all, axis=0)
rest_sim_concat = np.concatenate(rest_sim_all, axis=0)
print(f"  Empirical rest: {rest_emp_concat.shape}")
print(f"  Simulated rest: {rest_sim_concat.shape}")

# --- Now iterate over targets ---
for sub_id in sorted(dataset_emp.keys()):
    if sub_id not in dataset_sim:
        continue
    
    stim_runs_emp = dataset_emp[sub_id].get("task-stim", {})
    stim_runs_sim = dataset_sim[sub_id].get("task-stim", {})
    
    for run_idx, run_emp in stim_runs_emp.items():
        run_sim = stim_runs_sim.get(run_idx)
        if run_sim is None:
            continue
        
        ts_emp = run_emp.get("time series")
        ts_sim = run_sim.get("time series")
        target_vec = run_emp.get("target")
        
        if not isinstance(ts_emp, np.ndarray) or not isinstance(ts_sim, np.ndarray):
            continue
        if ts_emp.shape[1] < 450 or ts_sim.shape[1] < 450:
            continue
        
        target_idx = safe_target_idx(target_vec)
        if target_idx is None:
            continue
        
        target_key = f"target_{target_idx:03d}"
        
        if target_key not in all_seeds_results:
            all_seeds_results[target_key] = {
                'target_idx': target_idx,
                'seed_results': {}
            }
        
        # For each of 450 seeds, compute correlation
        for seed_idx in range(450):
            fc_seed_rest_emp = seed_based_fc(rest_emp_concat, seed_idx, all_regions=True)
            fc_seed_rest_sim = seed_based_fc(rest_sim_concat, seed_idx, all_regions=True)
            
            fc_seed_stim_emp = seed_based_fc(ts_emp, seed_idx, all_regions=True)
            fc_seed_stim_sim = seed_based_fc(ts_sim, seed_idx, all_regions=True)
            
            if fc_seed_rest_emp is None or fc_seed_stim_emp is None:
                continue
            
            # REST correlation
            r_rest = np.corrcoef(fc_seed_rest_emp, fc_seed_rest_sim)[0, 1]
            
            # STIM correlation
            r_stim = np.corrcoef(fc_seed_stim_emp, fc_seed_stim_sim)[0, 1]
            
            # DELTA correlation
            delta_emp = fc_seed_stim_emp - fc_seed_rest_emp
            delta_sim = fc_seed_stim_sim - fc_seed_rest_sim
            r_delta = np.corrcoef(delta_emp, delta_sim)[0, 1]
            
            seed_key = f"seed_{seed_idx:03d}"
            if seed_key not in all_seeds_results[target_key]['seed_results']:
                all_seeds_results[target_key]['seed_results'][seed_key] = []
            
            all_seeds_results[target_key]['seed_results'][seed_key].append({
                'subject': sub_id,
                'r_rest': float(r_rest),
                'r_stim': float(r_stim),
                'r_delta': float(r_delta),
            })

print(f"\n✓ Computed all-seeds correlations for {len(all_seeds_results)} target regions")

### Part II Results: Per-Target Statistics with % Significant Seeds

In [None]:
print("\n" + "="*70)
print("ALL-SEEDS VALIDATION: RESULTS PER TARGET REGION")
print("="*70 + "\n")

target_seeds_summary = []

for target_key in sorted(all_seeds_results.keys()):
    target_data = all_seeds_results[target_key]
    seed_results_dict = target_data['seed_results']
    
    all_r_delta = []
    significant_count = 0
    total_seeds = 0
    
    for seed_key in sorted(seed_results_dict.keys()):
        seed_corrs = seed_results_dict[seed_key]
        
        # Pool correlations across subjects/runs for this seed
        r_delta_vals = np.array([c['r_delta'] for c in seed_corrs if np.isfinite(c['r_delta'])])
        
        if len(r_delta_vals) == 0:
            continue
        
        all_r_delta.extend(r_delta_vals)
        total_seeds += 1
        
        # Test if mean r_delta significantly > 0
        t_stat, p_val = ttest_1samp(r_delta_vals, 0)
        if p_val < 0.05:
            significant_count += 1
    
    pct_significant = (significant_count / total_seeds * 100) if total_seeds > 0 else 0
    all_r_delta = np.array(all_r_delta)
    
    target_seeds_summary.append({
        'Target': target_key,
        'N_seeds': total_seeds,
        'Mean_r_delta': all_r_delta.mean() if len(all_r_delta) > 0 else np.nan,
        'Std_r_delta': all_r_delta.std() if len(all_r_delta) > 0 else np.nan,
        'Sig_seeds_count': significant_count,
        'Pct_significant': pct_significant,
    })

df_seeds_summary = pd.DataFrame(target_seeds_summary)
print(df_seeds_summary.to_string(index=False))

print("\n" + "="*70)
print("OVERALL STATISTICS (across all targets)")
print("="*70 + "\n")

mean_pct_sig = df_seeds_summary['Pct_significant'].mean()
std_pct_sig = df_seeds_summary['Pct_significant'].std()
mean_r_delta_all = df_seeds_summary['Mean_r_delta'].mean()

print(f"Mean % significant seeds: {mean_pct_sig:.1f}% ± {std_pct_sig:.1f}%")
print(f"Mean delta-FC correlation: {mean_r_delta_all:.4f}")
print(f"\n→ On average, {mean_pct_sig:.0f}% of seeds show SIGNIFICANT seed-FC correlation")

---
# Part III: Full FC Validation

Compare entire FC and delta-FC matrices between empirical and simulated.

In [None]:
print("\n" + "="*70)
print("PART III: FULL FC MATRIX VALIDATION")
print("="*70)
print("\nCompare entire FC and ΔFC matrices (empirical vs simulated)\n")

fc_matrix_results = {
    'rest_fc': [],
    'stim_fc': [],
    'delta_fc': []
}

for sub_id in sorted(dataset_emp.keys()):
    if sub_id not in dataset_sim:
        continue
    
    # --- REST FC ---
    rest_emp_list = []
    rest_sim_list = []
    
    for run in dataset_emp[sub_id].get("task-rest", {}).values():
        ts = run.get("time series")
        if isinstance(ts, np.ndarray) and ts.shape[1] >= 450:
            rest_emp_list.append(ts)
    
    for run in dataset_sim[sub_id].get("task-rest", {}).values():
        ts = run.get("time series")
        if isinstance(ts, np.ndarray) and ts.shape[1] >= 450:
            rest_sim_list.append(ts)
    
    if not rest_emp_list or not rest_sim_list:
        continue
    
    rest_emp = np.concatenate(rest_emp_list, axis=0)
    rest_sim = np.concatenate(rest_sim_list, axis=0)
    
    fc_rest_emp = compute_fc_matrix(rest_emp)
    fc_rest_sim = compute_fc_matrix(rest_sim)
    
    vec_rest_emp = upper_triangle_vec(fc_rest_emp, k=1)
    vec_rest_sim = upper_triangle_vec(fc_rest_sim, k=1)
    r_rest_fc = pearsonr(vec_rest_emp, vec_rest_sim)[0]
    fc_matrix_results['rest_fc'].append(r_rest_fc)
    
    # --- STIM and DELTA FC ---
    stim_runs_emp = dataset_emp[sub_id].get("task-stim", {})
    stim_runs_sim = dataset_sim[sub_id].get("task-stim", {})
    
    if not stim_runs_emp or not stim_runs_sim:
        continue
    
    stim_emp_list = []
    stim_sim_list = []
    
    for run in stim_runs_emp.values():
        ts = run.get("time series")
        if isinstance(ts, np.ndarray) and ts.shape[1] >= 450:
            stim_emp_list.append(ts)
    
    for run in stim_runs_sim.values():
        ts = run.get("time series")
        if isinstance(ts, np.ndarray) and ts.shape[1] >= 450:
            stim_sim_list.append(ts)
    
    if stim_emp_list and stim_sim_list:
        stim_emp = np.concatenate(stim_emp_list, axis=0)
        stim_sim = np.concatenate(stim_sim_list, axis=0)
        
        fc_stim_emp = compute_fc_matrix(stim_emp)
        fc_stim_sim = compute_fc_matrix(stim_sim)
        
        vec_stim_emp = upper_triangle_vec(fc_stim_emp, k=1)
        vec_stim_sim = upper_triangle_vec(fc_stim_sim, k=1)
        r_stim_fc = pearsonr(vec_stim_emp, vec_stim_sim)[0]
        fc_matrix_results['stim_fc'].append(r_stim_fc)
        
        # DELTA FC
        delta_fc_emp = fc_stim_emp - fc_rest_emp
        delta_fc_sim = fc_stim_sim - fc_rest_sim
        
        vec_delta_emp = upper_triangle_vec(delta_fc_emp, k=1)
        vec_delta_sim = upper_triangle_vec(delta_fc_sim, k=1)
        r_delta_fc = pearsonr(vec_delta_emp, vec_delta_sim)[0]
        fc_matrix_results['delta_fc'].append(r_delta_fc)

print(f"✓ Computed full FC correlations for {len(fc_matrix_results['rest_fc'])} subjects")

### Part III Results: Full FC Matrix Summary

In [None]:
print("\n" + "="*70)
print("FULL FC MATRIX VALIDATION: RESULTS")
print("="*70 + "\n")

fc_summary = []

for condition in ['rest_fc', 'stim_fc', 'delta_fc']:
    data = np.array(fc_matrix_results[condition])
    data = data[np.isfinite(data)]
    
    if len(data) == 0:
        continue
    
    t_stat, t_p = ttest_1samp(data, 0)
    
    fc_summary.append({
        'Condition': condition.replace('_fc', '').upper(),
        'N_subjects': len(data),
        'Mean_r': data.mean(),
        'Std_r': data.std(),
        'Min_r': data.min(),
        'Max_r': data.max(),
        'p_value': t_p,
        'Significant': 'YES' if t_p < 0.05 else 'NO',
    })

df_fc_summary = pd.DataFrame(fc_summary)
print(df_fc_summary.to_string(index=False))

print("\nInterpretation:")
print("  REST FC correlation: baseline connectivity similarity")
print("  STIM FC correlation: connectivity during stimulation")
print("  DELTA FC correlation: stimulation-induced FC changes")

---
# Final Summary & Conclusions

In [None]:
print("\n" + "="*70)
print("COMPREHENSIVE VALIDATION SUMMARY")
print("="*70)

print("""
## VALIDATION RESULTS

### Part I: Target-Seed Validation
- Uses only the stimulated target region as seed
- Indicates: Does the model capture TMS effects on target connectivity?
""")

rest_data = np.array(summary_by_condition['rest'])[np.isfinite(summary_by_condition['rest'])]
stim_data = np.array(summary_by_condition['stim'])[np.isfinite(summary_by_condition['stim'])]
delta_data = np.array(summary_by_condition['delta'])[np.isfinite(summary_by_condition['delta'])]

print(f"  REST:  mean r = {rest_data.mean():.4f}, p = {ttest_1samp(rest_data, 0)[1]:.6f}")
print(f"  STIM:  mean r = {stim_data.mean():.4f}, p = {ttest_1samp(stim_data, 0)[1]:.6f}")
print(f"  DELTA: mean r = {delta_data.mean():.4f}, p = {ttest_1samp(delta_data, 0)[1]:.6f}")

print("""
### Part II: All-Seeds Validation
- Repeats analysis for all 450 possible seeds
- Indicates: What % of brain seeds show significant correlation?
""")
print(f"  Average % significant seeds per target: {mean_pct_sig:.1f}% ± {std_pct_sig:.1f}%")
print(f"  → Interpretation: {mean_pct_sig:.0f}% of seed regions significantly capture")
print(f"     empirical-vs-simulated correlations")

print("""
### Part III: Full FC Matrix Validation
- Compares entire correlation matrices (not just seeds)
- Indicates: Overall FC pattern similarity
""")
rest_fc_data = np.array(fc_matrix_results['rest_fc'])[np.isfinite(fc_matrix_results['rest_fc'])]
delta_fc_data = np.array(fc_matrix_results['delta_fc'])[np.isfinite(fc_matrix_results['delta_fc'])]

if len(rest_fc_data) > 0:
    print(f"  REST FC:  mean r = {rest_fc_data.mean():.4f}, p = {ttest_1samp(rest_fc_data, 0)[1]:.6f}")
if len(delta_fc_data) > 0:
    print(f"  DELTA FC: mean r = {delta_fc_data.mean():.4f}, p = {ttest_1samp(delta_fc_data, 0)[1]:.6f}")

print("\n" + "="*70)
print("INTERPRETATION: IS SIMULATED TMS RELATED TO EMPIRICAL TMS?")
print("="*70)

# Decision logic
delta_sig = ttest_1samp(delta_data, 0)[1] < 0.05 if len(delta_data) > 0 else False
delta_fc_sig = ttest_1samp(delta_fc_data, 0)[1] < 0.05 if len(delta_fc_data) > 0 else False
pct_seeds_high = mean_pct_sig > 30  # If >30% of seeds are significant

print("\nCriteria for validation:")
print(f"  1. Target-seed DELTA significant (p < 0.05): {'✓ YES' if delta_sig else '✗ NO'}")
print(f"  2. Full FC DELTA significant (p < 0.05): {'✓ YES' if delta_fc_sig else '✗ NO'}")
print(f"  3. >30% of seeds significant: {'✓ YES' if pct_seeds_high else '✗ NO'}")

if delta_sig and (delta_fc_sig or pct_seeds_high):
    print("""
╔════════════════════════════════════════════════════════════════╗
║ ✓ STRONG VALIDATION: YES, simulated TMS effects ARE related   ║
║ to empirical TMS effects                                       ║
║                                                                ║
║ The model successfully captures TMS-induced connectivity       ║
║ changes both at target-seed and distributed network levels.   ║
╚════════════════════════════════════════════════════════════════╝
""")
elif delta_sig:
    print("""
╔════════════════════════════════════════════════════════════════╗
║ ⚠ MODERATE VALIDATION: Partial support for model validity     ║
║                                                                ║
║ Target-seed effects are captured, but full FC patterns show   ║
║ limited correlation. The model may capture local effects      ║
║ but miss distributed network reorganization.                  ║
╚════════════════════════════════════════════════════════════════╝
""")
else:
    print("""
╔════════════════════════════════════════════════════════════════╗
║ ✗ LIMITED VALIDATION: Simulated effects NOT significantly     ║
║ related to empirical effects                                   ║
║                                                                ║
║ The population model may be insufficient for capturing        ║
║ TMS-specific effects. Subject-specific or hierarchical        ║
║ models may be needed.                                          ║
╚════════════════════════════════════════════════════════════════╝
"")

print("\n" + "="*70)

## Save Results

In [None]:
# Compile all results
comprehensive_results = {
    'metadata': {
        'validation_type': 'Comprehensive TMS-fMRI ANN Validation',
        'timestamp': str(pd.Timestamp.now()),
    },
    'part_i_target_seed': {
        'rest_mean_r': float(rest_data.mean()),
        'rest_p_value': float(ttest_1samp(rest_data, 0)[1]),
        'stim_mean_r': float(stim_data.mean()),
        'stim_p_value': float(ttest_1samp(stim_data, 0)[1]),
        'delta_mean_r': float(delta_data.mean()),
        'delta_p_value': float(ttest_1samp(delta_data, 0)[1]),
        'n_observations': int(len(delta_data)),
    },
    'part_ii_all_seeds': {
        'mean_pct_significant_seeds': float(mean_pct_sig),
        'std_pct_significant_seeds': float(std_pct_sig),
        'n_targets': int(len(df_seeds_summary)),
        'per_target_summary': df_seeds_summary.to_dict('records'),
    },
    'part_iii_full_fc': df_fc_summary.to_dict('records'),
}

with open(VALIDATION_RESULTS_JSON, 'w') as f:
    json.dump(comprehensive_results, f, indent=2)

print(f"✓ Saved comprehensive validation results to:")
print(f"  {VALIDATION_RESULTS_JSON}")