In [8]:
# CELL 1: Imports
import numpy as np
import pandas as pd
from pathlib import Path
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import StratifiedShuffleSplit
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

In [10]:
# CELL 2: Paths
BASE_DIR = Path("/user_data/csimmon2/long_pt")
PATTERN_DIR = BASE_DIR / "results" / "patterns"  # From Cell 8 output
OUTPUT_DIR = BASE_DIR / "results" / "decoding"
RESULTS_DIR = Path("/user_data/csimmon2/git_repos/long_pt") / "results" / "decoding"
RESULTS_DIR.mkdir(parents=True, exist_ok=True)
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

SUBJECTS = {
    'sub-004': {'code': 'UD', 'sessions': ['01', '02', '03', '05', '06']},
    'sub-021': {'code': 'TC', 'sessions': ['01', '02', '03']}
}

print(f"Pattern directory: {PATTERN_DIR}")
print(f"Output directory: {OUTPUT_DIR}")

# Verify patterns exist
pattern_files = list(PATTERN_DIR.glob('*.npy'))
print(f"Found {len(pattern_files)} pattern files")

Pattern directory: /user_data/csimmon2/long_pt/results/patterns
Output directory: /user_data/csimmon2/long_pt/results/decoding
Found 0 pattern files


In [12]:
# Run this
import os
for root, dirs, files in os.walk('/user_data/csimmon2/long_pt'):
    npy_files = [f for f in files if f.endswith('.npy') and 'UD_ses' in f]
    if npy_files:
        print(f"Found {len(npy_files)} pattern files in: {root}")
        print(f"Example: {npy_files[0]}")
        break

Found 900 pattern files in: /user_data/csimmon2/long_pt/analyses/rsa_corrected/patterns
Example: UD_ses-05_run-2_house_pair-1_Scramble.npy


In [11]:
# CELL 3: Category Decoding Function
def decode_categories(subject_id, n_folds=30, test_size=0.2):
    """
    Classify each category vs. others using SVM
    Combines patterns across all 3 run pairs
    """
    code = SUBJECTS[subject_id]['code']
    sessions = SUBJECTS[subject_id]['sessions']
    stim_conditions = ['Face', 'House', 'Object', 'Word']
    pair_to_leftout_run = {0: 3, 1: 2, 2: 1}
    
    # Get all ROI categories from pattern files
    roi_categories = []
    for f in PATTERN_DIR.glob(f'{code}_*.npy'):
        parts = f.stem.split('_')
        roi = parts[3]  # e.g., 'face', 'word', etc.
        if roi not in roi_categories:
            roi_categories.append(roi)
    
    print(f"\n{code}: Decoding in {len(roi_categories)} ROIs across {len(sessions)} sessions")
    
    results = []
    
    for roi_category in roi_categories:
        for session in sessions:
            print(f"  {roi_category.upper()} ROI, ses-{session}:")
            
            # Load patterns from all 3 pairs
            all_patterns = {cond: [] for cond in stim_conditions}
            
            for pair_idx in range(3):
                run_idx = pair_to_leftout_run[pair_idx]
                
                for stim_cond in stim_conditions:
                    pattern_file = PATTERN_DIR / f'{code}_ses-{session}_run-{run_idx}_{roi_category}_pair-{pair_idx}_{stim_cond}.npy'
                    
                    if pattern_file.exists():
                        blocks = np.load(pattern_file)
                        for block in blocks:
                            all_patterns[stim_cond].append(block)
            
            # Decode each category vs. others
            for target in stim_conditions:
                target_key = target
                
                # Get target and distractor patterns
                X_target = np.array(all_patterns[target_key])
                X_others = []
                for cond in stim_conditions:
                    if cond != target:
                        X_others.extend(all_patterns[cond])
                X_others = np.array(X_others)
                
                if len(X_target) < 3 or len(X_others) < 3:
                    print(f"    {target}: insufficient data")
                    continue
                
                # Combine and create labels
                X = np.vstack([X_target, X_others])
                y = np.array([1]*len(X_target) + [0]*len(X_others))
                
                # Remove NaN columns
                valid_mask = ~np.isnan(X).any(axis=0)
                X_clean = X[:, valid_mask]
                
                if X_clean.shape[1] < 10:
                    print(f"    {target}: too few voxels")
                    continue
                
                # Stratified shuffle split
                sss = StratifiedShuffleSplit(n_splits=n_folds, test_size=test_size, random_state=42)
                
                accuracies = []
                clf = make_pipeline(StandardScaler(), SVC(gamma='auto'))
                
                for train_idx, test_idx in sss.split(X_clean, y):
                    X_train, X_test = X_clean[train_idx], X_clean[test_idx]
                    y_train, y_test = y[train_idx], y[test_idx]
                    
                    clf.fit(X_train, y_train)
                    accuracies.append(clf.score(X_test, y_test))
                
                acc_mean = np.mean(accuracies)
                acc_std = np.std(accuracies)
                
                results.append({
                    'subject': code,
                    'roi': roi_category,
                    'session': session,
                    'target_category': target.lower(),
                    'accuracy': acc_mean,
                    'se': acc_std,
                    'n_voxels': X_clean.shape[1],
                    'n_target': len(X_target),
                    'n_others': len(X_others)
                })
                
                print(f"    {target}: {acc_mean:.3f} ± {acc_std:.3f} ({X_clean.shape[1]} voxels)")
    
    return pd.DataFrame(results)

# Run decoding
ud_decoding = decode_categories('sub-004', n_folds=30, test_size=0.2)
tc_decoding = decode_categories('sub-021', n_folds=30, test_size=0.2)

# Save results
# At the end of Cell 3, change save lines:
ud_decoding.to_csv(RESULTS_DIR / 'ud_decoding.csv', index=False)
tc_decoding.to_csv(RESULTS_DIR / 'tc_decoding.csv', index=False)

print(f"\n✓ Decoding complete - saved to {RESULTS_DIR}")
print(f"\n✓ Decoding complete")
print(f"UD: {len(ud_decoding)} results")
print(f"TC: {len(tc_decoding)} results")


UD: Decoding in 0 ROIs across 5 sessions

TC: Decoding in 0 ROIs across 3 sessions

✓ Decoding complete - saved to /user_data/csimmon2/git_repos/long_pt/results/decoding

✓ Decoding complete
UD: 0 results
TC: 0 results
