# SPM Tutorial #8: Group Analysis

## Sections
- Specifying the 2nd-Level Analysis
- Estimating the 2nd-Level Analysis
- Viewing the Results
- 2nd-Level Results for Incongruent and Congruent

## Notes
- Ensure all subjects have complete first-level outputs

In [None]:
import numpy as np
import pandas as pd

def get_subject_list(dataset_path="ds000102", pattern="sub-*"):
    import glob
    subjects = sorted(glob.glob(f"{dataset_path}/{pattern}"))
    subjects = [s.split('/')[-1] for s in subjects]
    return subjects

def load_subject_data(subject_id, run_id=1, dataset_path="ds000102"):
    events_file = f"{dataset_path}/{subject_id}/func/{subject_id}_task-flanker_run-{run_id}_events.tsv"
    bold_file = f"{dataset_path}/{subject_id}/func/{subject_id}_task-flanker_run-{run_id}_bold.nii.gz"
    try:
        events = pd.read_csv(events_file, sep="\t")
        return {'subject': subject_id, 'run': run_id, 'events': events, 'bold_file': bold_file}
    except FileNotFoundError:
        return None

def calculate_behavioral_metrics(events_df):
    metrics = {
        'n_trials': len(events_df),
        'accuracy': (events_df['correctness'] == 'correct').sum() / len(events_df),
        'mean_rt': events_df['response_time'].mean(),
        'std_rt': events_df['response_time'].std(),
        'congruent_rt': events_df[events_df['Stimulus'] == 'congruent']['response_time'].mean(),
        'incongruent_rt': events_df[events_df['Stimulus'] == 'incongruent']['response_time'].mean(),
    }
    metrics['flanker_effect'] = metrics['incongruent_rt'] - metrics['congruent_rt']
    return metrics

print("\n" + "=" * 70)
print("TUTORIAL #8: SECOND-LEVEL (GROUP) ANALYSIS")
print("=" * 70)

print(f"\n1. PREPARING CONTRAST IMAGES")
print("-" * 70)

group_contrasts = []
subject_ids_analysis = []
subject_list = get_subject_list()

for subject_id in subject_list[:10]:
    data_subj = load_subject_data(subject_id, run_id=1)
    if data_subj is not None:
        subject_ids_analysis.append(subject_id)
        metrics = calculate_behavioral_metrics(data_subj['events'])
        contrast_val = metrics['flanker_effect'] * 10
        group_contrasts.append(contrast_val)

group_contrasts = np.array(group_contrasts)
n_subjects = len(group_contrasts)

print(f"Subjects included: {n_subjects}")
print(f"Contrasts prepared: Incongruent > Congruent")
print(f"Contrast distribution:")
print(f"  Mean: {group_contrasts.mean():.4f}")
print(f"  Std: {group_contrasts.std():.4f}")
print(f"  Min: {group_contrasts.min():.4f}")
print(f"  Max: {group_contrasts.max():.4f}")

print(f"\n2. ONE-SAMPLE T-TEST")
print("-" * 70)

from scipy.stats import ttest_1samp, sem
t_stat, p_val = ttest_1samp(group_contrasts, 0)
df = n_subjects - 1
se = sem(group_contrasts)
ci_lower = group_contrasts.mean() - 1.96 * se
ci_upper = group_contrasts.mean() + 1.96 * se

print(f"One-sample t-test (testing if mean ≠ 0):")
print(f"  Mean: {group_contrasts.mean():.4f}")
print(f"  SE: {se:.4f}")
print(f"  95% CI: [{ci_lower:.4f}, {ci_upper:.4f}]")
print(f"  t({df}) = {t_stat:.4f}")
print(f"  p-value = {p_val:.6f}")
if p_val < 0.05:
    print(f"  ✓ Significant at p < 0.05 (reject null hypothesis)")
else:
    print(f"  ✗ Not significant at p < 0.05")

cohens_d = group_contrasts.mean() / group_contrasts.std()
print(f"  Cohen's d: {cohens_d:.4f}")

print(f"\n3. MULTIPLE COMPARISON CORRECTION")
print("-" * 70)

n_voxels = 64 * 64 * 40
alpha = 0.05
bonferroni_threshold = alpha / n_voxels
print(f"Multiple comparison correction:")
print(f"  Number of voxels: {n_voxels:,}")
print(f"  Bonferroni threshold (α={alpha}): {bonferroni_threshold:.2e}")

from statsmodels.stats.multitest import multipletests
simulated_pvals = np.random.beta(2, 10, n_voxels)
reject_fdr, pvals_fdr, _, _ = multipletests(simulated_pvals, alpha=0.05, method='fdr_bh')
n_sig_fdr = np.sum(reject_fdr)
print(f"  FDR-corrected (q<0.05): {n_sig_fdr:,} voxels significant")

print(f"\n✓ Group analysis simulation complete")

In [None]:
import matplotlib.pyplot as plt
import numpy as np

## Group Analysis Visualization
fig, axes = plt.subplots(2, 2, figsize=(16, 10))
fig.suptitle('Second-Level (Group) Analysis: Statistical Inference', fontsize=14, fontweight='bold')

ax = axes[0, 0]
ax.hist(group_contrasts, bins=10, color='steelblue', alpha=0.7, edgecolor='black')
ax.axvline(group_contrasts.mean(), color='red', linestyle='--', linewidth=2.5, label=f'Mean = {group_contrasts.mean():.4f}')
ax.axvline(0, color='green', linestyle='--', linewidth=2.5, label='Null (0)')
ax.set_xlabel('Contrast Value', fontweight='bold')
ax.set_ylabel('Frequency (Number of Subjects)', fontweight='bold')
ax.set_title(f'Distribution of Subject Contrasts (N={n_subjects})', fontweight='bold')
ax.legend()
ax.grid(alpha=0.3, axis='y')

ax = axes[0, 1]
subjects_short = [s.replace('sub-', '') for s in subject_ids_analysis]
y_pos = np.arange(len(group_contrasts))
colors = ['green' if x > 0 else 'red' for x in group_contrasts]
ax.barh(y_pos, group_contrasts, color=colors, alpha=0.7, edgecolor='black')
ax.axvline(group_contrasts.mean(), color='blue', linestyle='--', linewidth=2.5, label='Group Mean')
ax.axvline(0, color='black', linestyle='-', linewidth=1)
ax.set_yticks(y_pos)
ax.set_yticklabels(subjects_short, fontsize=9)
ax.set_xlabel('Contrast Value', fontweight='bold')
ax.set_title('Individual Subject Contrasts', fontweight='bold')
ax.legend()
ax.grid(alpha=0.3, axis='x')

ax = axes[1, 0]
t_range = np.linspace(-5, 5, 200)
from scipy.stats import t as t_dist
df = n_subjects - 1
t_pdf = t_dist.pdf(t_range, df)
ax.plot(t_range, t_pdf, 'b-', linewidth=2, label='t-distribution')
ax.fill_between(t_range[t_range > abs(t_stat)], 0, t_dist.pdf(t_range[t_range > abs(t_stat)], df),
                alpha=0.3, color='red', label=f'p-value = {p_val:.4f}')
ax.axvline(t_stat, color='red', linestyle='--', linewidth=2.5, label=f't = {t_stat:.4f}')
ax.axvline(-t_stat, color='red', linestyle='--', linewidth=2.5)
ax.set_xlabel('t-value', fontweight='bold')
ax.set_ylabel('Probability Density', fontweight='bold')
ax.set_title(f'One-Sample t-Test (df={df})', fontweight='bold')
ax.legend()
ax.grid(alpha=0.3)

ax = axes[1, 1]
alpha_levels = [0.05, 0.01, 0.001]
threshold_bonf = [-np.log10(alpha / n_voxels) for alpha in alpha_levels]
threshold_uncorr = [-np.log10(alpha) for alpha in alpha_levels]

x = np.arange(len(alpha_levels))
width = 0.35

bars1 = ax.bar(x - width/2, threshold_uncorr, width, label='Uncorrected', alpha=0.7, color='blue', edgecolor='black')
bars2 = ax.bar(x + width/2, threshold_bonf, width, label=f'Bonferroni (n={n_voxels:,})', alpha=0.7, color='red', edgecolor='black')

ax.set_ylabel('-log10(p-value)', fontweight='bold')
ax.set_title('Multiple Comparison Correction Thresholds', fontweight='bold')
ax.set_xticks(x)
ax.set_xticklabels([f'α={a}' for a in alpha_levels])
ax.legend()
ax.grid(alpha=0.3, axis='y')

for bars in [bars1, bars2]:
    for bar in bars:
        height = bar.get_height()
        ax.text(bar.get_x() + bar.get_width()/2., height,
                f'{height:.1f}', ha='center', va='bottom', fontsize=9)

plt.tight_layout()
plt.savefig('group_analysis_results.png', dpi=100, bbox_inches='tight')
plt.show()

print("✓ Group Analysis Visualization Complete")