# Simple Machine Learning Testing

We want to see if we can train something simple like a Random Forest to predict if a chunk is high or low anxiety. 

In [None]:
from sklearn.model_selection import train_test_split
from helper_functions import *

# load the events from JSON 
with open("subject_movie_events.json", "r") as f:
    event_dict = json.load(f)

# List of fMRI files and corresponding task names (or event keys)
fmri_files = [
    "subset_fmri/sub-S01_ses-1_task-BigBuckBunny_space-MNI_desc-ppres_bold.nii.gz",
    "subset_fmri/sub-S01_ses-1_task-FirstBite_space-MNI_desc-ppres_bold.nii.gz",
    "subset_fmri/sub-S01_ses-1_task-YouAgain_space-MNI_desc-ppres_bold.nii.gz",
    "subset_fmri/sub-S01_ses-2_task-AfterTheRain_space-MNI_desc-ppres_bold.nii.gz",
    "subset_fmri/sub-S01_ses-2_task-LessonLearned_space-MNI_desc-ppres_bold.nii.gz",
    "subset_fmri/sub-S17_ses-3_task-BetweenViewings_space-MNI_desc-ppres_bold.nii.gz",
    "subset_fmri/sub-S17_ses-3_task-FirstBite_space-MNI_desc-ppres_bold.nii.gz",
    "subset_fmri/sub-S17_ses-3_task-Superhero_space-MNI_desc-ppres_bold.nii.gz"
]

# List of task names corresponding to each fMRI file
tasks = [
    "BigBuckBunny",
    "FirstBite",
    "YouAgain",
    "AfterTheRain",
    "LessonLearned",
    "BetweenViewings",
    "Superhero"
]

# Store low and high anxiety chunks for all subjects
all_low_anx_chunks = []
all_high_anx_chunks = []

# Loop through each subject/task pair
for fmri_file, task_name in zip(fmri_files, tasks):
    subject_id = fmri_file.split("/")[1].split("_")[0].replace("sub-", "")
    event_key = f"{subject_id}::{task_name}"

    # Check if the event key exists in the event dictionary
    if event_key in event_dict:
        subject_events = event_dict[event_key]
        print(f"Events found for subject {subject_id} with task {task_name}.")
        
        # Process the fMRI data and extract anxiety chunks
        extracted_chunks, low_anx_chunks, high_anx_chunks = process_fmri_data(fmri_file, subject_events)
        all_low_anx_chunks.extend(low_anx_chunks)
        all_high_anx_chunks.extend(high_anx_chunks)


Events found for subject S01 with task BigBuckBunny.
fMRI data loaded. Data shape: (91, 109, 91, 528)
Event 1: Volumes 12–16 → chunk shape: (91, 109, 91, 4) | Anxiety = 70.5
Event 2: Volumes 34–39 → chunk shape: (91, 109, 91, 5) | Anxiety = 17.0
Event 3: Volumes 61–66 → chunk shape: (91, 109, 91, 5) | Anxiety = 92.5
Event 4: Volumes 84–90 → chunk shape: (91, 109, 91, 6) | Anxiety = 65.0
Event 5: Volumes 107–112 → chunk shape: (91, 109, 91, 5) | Anxiety = 75.5
Event 6: Volumes 133–138 → chunk shape: (91, 109, 91, 5) | Anxiety = 95.5
Event 7: Volumes 148–153 → chunk shape: (91, 109, 91, 5) | Anxiety = 100.0
Event 8: Volumes 173–176 → chunk shape: (91, 109, 91, 3) | Anxiety = 100.0
Event 9: Volumes 186–190 → chunk shape: (91, 109, 91, 4) | Anxiety = 51.0
Event 10: Volumes 192–198 → chunk shape: (91, 109, 91, 6) | Anxiety = 64.5
Event 11: Volumes 207–210 → chunk shape: (91, 109, 91, 3) | Anxiety = 9.5
Event 12: Volumes 238–243 → chunk shape: (91, 109, 91, 5) | Anxiety = 79.5
Event 13: Volu

In [196]:
print(len(all_low_anx_chunks))
print(len(all_high_anx_chunks))

63
37


In [82]:
tr =  1.2999999523162842
atlas = datasets.fetch_atlas_schaefer_2018(n_rois=100, yeo_networks=7)
labels = atlas["labels"]
masker = NiftiLabelsMasker(labels_img=atlas["maps"], standardize=True, t_r=tr)

def compute_fc(chunk_4d, tr, affine, masker):
    chunk_img = nib.Nifti1Image(chunk_4d, affine)
    roi_ts = masker.fit_transform(chunk_img)
    fc_matrix = ConnectivityMeasure(kind="correlation").fit_transform([roi_ts])[0]
    return fc_matrix

[get_dataset_dir] Dataset found in /Users/emmasombers/nilearn_data/schaefer_2018


In [87]:
from helper_functions import *

X = []  # Features (flattened FC)
y = []  # Labels (0 = low anxiety, 1 = high anxiety)

# Process the low anxiety chunks
for chunk in all_low_anx_chunks:
    print(f"Chunk data shape: {chunk['chunk'].shape}, affine: {chunk['affine']}")
    if chunk['chunk'].shape[3] == 0:  # Skip empty chunks
        print(f"Skipping empty chunk for anxiety {chunk['anxiety']}")
        continue  # Skip this chunk and move to the next one
    fc = compute_fc(chunk["chunk"], tr, chunk["affine"], masker)
    X.append(fc.flatten())  # Flatten FC to a 1D feature vector
    y.append(0)  # Label for low anxiety

# Process the high anxiety chunks
for chunk in all_high_anx_chunks:
    print(f"Chunk data shape: {chunk['chunk'].shape}, affine: {chunk['affine']}")
    if chunk['chunk'].shape[3] == 0:  # Skip empty chunks
        print(f"Skipping empty chunk for anxiety {chunk['anxiety']}")
        continue  # Skip this chunk and move to the next one
    fc = compute_fc(chunk["chunk"], tr, chunk["affine"], masker)
    X.append(fc.flatten())  # Flatten FC to a 1D feature vector
    y.append(1)  # Label for high anxiety

Chunk data shape: (91, 109, 91, 5), affine: [[  -2.    0.    0.   90.]
 [   0.    2.    0. -126.]
 [   0.    0.    2.  -72.]
 [   0.    0.    0.    1.]]
Chunk data shape: (91, 109, 91, 3), affine: [[  -2.    0.    0.   90.]
 [   0.    2.    0. -126.]
 [   0.    0.    2.  -72.]
 [   0.    0.    0.    1.]]
Chunk data shape: (91, 109, 91, 5), affine: [[  -2.    0.    0.   90.]
 [   0.    2.    0. -126.]
 [   0.    0.    2.  -72.]
 [   0.    0.    0.    1.]]
Chunk data shape: (91, 109, 91, 4), affine: [[  -2.    0.    0.   90.]
 [   0.    2.    0. -126.]
 [   0.    0.    2.  -72.]
 [   0.    0.    0.    1.]]
Chunk data shape: (91, 109, 91, 5), affine: [[  -2.    0.    0.   90.]
 [   0.    2.    0. -126.]
 [   0.    0.    2.  -72.]
 [   0.    0.    0.    1.]]
Chunk data shape: (91, 109, 91, 4), affine: [[  -2.    0.    0.   90.]
 [   0.    2.    0. -126.]
 [   0.    0.    2.  -72.]
 [   0.    0.    0.    1.]]
Chunk data shape: (91, 109, 91, 8), affine: [[  -2.    0.    0.   90.]
 [   0.    

In [88]:
print(len(X))
print(y)

99
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]


In [93]:
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC  # Support Vector Machine Classifier
from sklearn.metrics import classification_report

# Convert lists to numpy arrays
X = np.array(X)
y = np.array(y)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
print(f"Size of training data: {X_train.shape[0]} samples")
print(f"Size of testing data: {X_test.shape[0]} samples")

# Initialize and train an SVM classifier
clf = SVC(random_state=42)
clf.fit(X_train, y_train)

# Predict on the test set
y_pred = clf.predict(X_test)

# Print classification report
print(classification_report(y_test, y_pred))

Size of training data: 79 samples
Size of testing data: 20 samples
              precision    recall  f1-score   support

           0       0.90      0.64      0.75        14
           1       0.50      0.83      0.62         6

    accuracy                           0.70        20
   macro avg       0.70      0.74      0.69        20
weighted avg       0.78      0.70      0.71        20



remember precision means true positives/(true positives+ false positives) so what percent of the time the classifier was correct when predicting a positive 

recall means true positives/(true positives+false negatives) so what percent of actual class samples were correctly predicted as such 

# Group by networks instead of the larger FC situation 

In [115]:
# Compute FC by using the network groupings
def compute_grouped_fc(chunk_4d, tr, affine, network_groupings, masker):
    chunk_img = nib.Nifti1Image(chunk_4d, affine)
    roi_ts = masker.fit_transform(chunk_img)
    
    # Create a new FC matrix based on network groupings
    fc_matrix_grouped = np.zeros((len(network_groupings), len(network_groupings)))
    
    for i, (network_1, regions_1) in enumerate(network_groupings.items()):
        for j, (network_2, regions_2) in enumerate(network_groupings.items()):
            # Compute the mean time series for each network
            ts_1 = np.mean(roi_ts[:, regions_1], axis=1) if regions_1 else np.zeros(roi_ts.shape[0])
            ts_2 = np.mean(roi_ts[:, regions_2], axis=1) if regions_2 else np.zeros(roi_ts.shape[0])
            
            # Compute correlation between the two networks' time series
            fc_matrix_grouped[i, j] = np.corrcoef(ts_1, ts_2)[0, 1]
    
    return fc_matrix_grouped

def upper_triangle_mask(fc_matrix):
    # Create an upper triangle mask (excluding the diagonal)
    mask = np.triu(np.ones(fc_matrix.shape), k=1)
    return np.multiply(fc_matrix, mask)  # Apply the mask to the matrix


In [97]:
network_map = {
    "Visual": [b'7Networks_LH_Vis_1', b'7Networks_LH_Vis_2', b'7Networks_LH_Vis_3', b'7Networks_LH_Vis_4', 
               b'7Networks_LH_Vis_5', b'7Networks_LH_Vis_6', b'7Networks_LH_Vis_7', b'7Networks_LH_Vis_8',
               b'7Networks_LH_Vis_9', b'7Networks_RH_Vis_1', b'7Networks_RH_Vis_2', b'7Networks_RH_Vis_3',
               b'7Networks_RH_Vis_4', b'7Networks_RH_Vis_5', b'7Networks_RH_Vis_6', b'7Networks_RH_Vis_7',
               b'7Networks_RH_Vis_8'],
    "Somatomotor": [b'7Networks_LH_SomMot_1', b'7Networks_LH_SomMot_2', b'7Networks_LH_SomMot_3', 
                    b'7Networks_LH_SomMot_4', b'7Networks_LH_SomMot_5', b'7Networks_LH_SomMot_6', 
                    b'7Networks_RH_SomMot_1', b'7Networks_RH_SomMot_2', b'7Networks_RH_SomMot_3',
                    b'7Networks_RH_SomMot_4', b'7Networks_RH_SomMot_5', b'7Networks_RH_SomMot_6'],
    "Dorsal Attention": [b'7Networks_LH_DorsAttn_Post_1', b'7Networks_LH_DorsAttn_Post_2', 
                         b'7Networks_LH_DorsAttn_Post_3', b'7Networks_LH_DorsAttn_Post_4', 
                         b'7Networks_LH_DorsAttn_Post_5', b'7Networks_LH_DorsAttn_Post_6',
                         b'7Networks_LH_DorsAttn_PrCv_1', b'7Networks_RH_DorsAttn_Post_1', 
                         b'7Networks_RH_DorsAttn_Post_2', b'7Networks_RH_DorsAttn_Post_3',
                         b'7Networks_RH_DorsAttn_Post_4', b'7Networks_RH_DorsAttn_Post_5',
                         b'7Networks_RH_DorsAttn_PrCv_1'],
    "Salience and Ventral Attention": [b'7Networks_LH_SalVentAttn_ParOper_1', 
                                       b'7Networks_LH_SalVentAttn_FrOperIns_1', 
                                       b'7Networks_RH_SalVentAttn_TempOccPar_1', 
                                       b'7Networks_RH_SalVentAttn_TempOccPar_2', 
                                       b'7Networks_RH_SalVentAttn_FrOperIns_1'],
    "Limbic": [b'7Networks_LH_Limbic_OFC_1', b'7Networks_LH_Limbic_TempPole_1', 
               b'7Networks_LH_Limbic_TempPole_2', b'7Networks_RH_Limbic_OFC_1', 
               b'7Networks_RH_Limbic_TempPole_1'],
    "Control": [b'7Networks_LH_Cont_Par_1', b'7Networks_LH_Cont_PFCl_1', 
                b'7Networks_LH_Cont_pCun_1', b'7Networks_LH_Cont_Cing_1', 
                b'7Networks_RH_Cont_Par_1', b'7Networks_RH_Cont_Par_2', 
                b'7Networks_RH_Cont_PFCl_1', b'7Networks_RH_Cont_PFCl_2', 
                b'7Networks_RH_Cont_PFCl_3', b'7Networks_RH_Cont_PFCl_4'],
    "Default Mode": [b'7Networks_LH_Default_Temp_1', b'7Networks_LH_Default_Temp_2', 
                     b'7Networks_LH_Default_Par_1', b'7Networks_LH_Default_Par_2', 
                     b'7Networks_LH_Default_PFC_1', b'7Networks_LH_Default_PFC_2', 
                     b'7Networks_LH_Default_PFC_3', b'7Networks_LH_Default_PFC_4', 
                     b'7Networks_LH_Default_PFC_5', b'7Networks_LH_Default_PFC_6', 
                     b'7Networks_LH_Default_PFC_7', b'7Networks_LH_Default_pCunPCC_1',
                     b'7Networks_LH_Default_pCunPCC_2', b'7Networks_RH_Default_Par_1', 
                     b'7Networks_RH_Default_Temp_1', b'7Networks_RH_Default_Temp_2', 
                     b'7Networks_RH_Default_Temp_3', b'7Networks_RH_Default_PFCv_1', 
                     b'7Networks_RH_Default_PFCv_2', b'7Networks_RH_Default_PFCdPFCm_1',
                     b'7Networks_RH_Default_PFCdPFCm_2', b'7Networks_RH_Default_PFCdPFCm_3', 
                     b'7Networks_RH_Default_pCunPCC_1', b'7Networks_RH_Default_pCunPCC_2']
}

# Map regions to their respective networks
grouped_regions = {network: [] for network in network_map}

for idx, label in enumerate(labels):
    for network, regions in network_map.items():
        if label in regions:
            grouped_regions[network].append(idx)

# Print the grouped regions to verify
for network, regions in grouped_regions.items():
    print(f"{network}: {regions}")

Visual: [0, 1, 2, 3, 4, 5, 6, 7, 8, 50, 51, 52, 53, 54, 55, 56, 57]
Somatomotor: [9, 10, 11, 12, 13, 14, 58, 59, 60, 61, 62, 63]
Dorsal Attention: [15, 16, 17, 18, 19, 20, 21, 66, 67, 68, 69, 70, 71]
Salience and Ventral Attention: [23, 24, 73, 74, 75]
Limbic: [30, 31, 32, 78, 79]
Control: [33, 34, 35, 36, 80, 81, 82, 83, 84, 85]
Default Mode: [37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99]


In [116]:
X_grouped = []  # Features (flattened FC)
y_grouped = []  # Labels (0 = low anxiety, 1 = high anxiety)

atlas = datasets.fetch_atlas_schaefer_2018(n_rois=100, yeo_networks=7)
labels = atlas["labels"]
masker = NiftiLabelsMasker(labels_img=atlas["maps"], standardize=True, t_r=tr)

# Process the low anxiety chunks
for chunk in all_low_anx_chunks:
    print(f"Chunk data shape: {chunk['chunk'].shape}, affine: {chunk['affine']}")
    if chunk['chunk'].shape[3] == 0:  # Skip empty chunks
        print(f"Skipping empty chunk for anxiety {chunk['anxiety']}")
        continue  # Skip this chunk and move to the next one
    fc = compute_grouped_fc(chunk["chunk"], tr, chunk["affine"], grouped_regions, masker)
    fc_upper_triangle = upper_triangle_mask(fc)
    X_grouped.append(fc.flatten())  # Flatten FC to a 1D feature vector
    y_grouped.append(0)  # Label for low anxiety

# Process the high anxiety chunks
for chunk in all_high_anx_chunks:
    print(f"Chunk data shape: {chunk['chunk'].shape}, affine: {chunk['affine']}")
    if chunk['chunk'].shape[3] == 0:  # Skip empty chunks
        print(f"Skipping empty chunk for anxiety {chunk['anxiety']}")
        continue  # Skip this chunk and move to the next one
    fc = compute_grouped_fc(chunk["chunk"], tr, chunk["affine"], grouped_regions, masker)
    fc_upper_triangle = upper_triangle_mask(fc)
    X_grouped.append(fc.flatten())  # Flatten FC to a 1D feature vector
    y_grouped.append(1)  # Label for high anxiety

[get_dataset_dir] Dataset found in /Users/emmasombers/nilearn_data/schaefer_2018
Chunk data shape: (91, 109, 91, 5), affine: [[  -2.    0.    0.   90.]
 [   0.    2.    0. -126.]
 [   0.    0.    2.  -72.]
 [   0.    0.    0.    1.]]
Chunk data shape: (91, 109, 91, 3), affine: [[  -2.    0.    0.   90.]
 [   0.    2.    0. -126.]
 [   0.    0.    2.  -72.]
 [   0.    0.    0.    1.]]
Chunk data shape: (91, 109, 91, 5), affine: [[  -2.    0.    0.   90.]
 [   0.    2.    0. -126.]
 [   0.    0.    2.  -72.]
 [   0.    0.    0.    1.]]
Chunk data shape: (91, 109, 91, 4), affine: [[  -2.    0.    0.   90.]
 [   0.    2.    0. -126.]
 [   0.    0.    2.  -72.]
 [   0.    0.    0.    1.]]
Chunk data shape: (91, 109, 91, 5), affine: [[  -2.    0.    0.   90.]
 [   0.    2.    0. -126.]
 [   0.    0.    2.  -72.]
 [   0.    0.    0.    1.]]
Chunk data shape: (91, 109, 91, 4), affine: [[  -2.    0.    0.   90.]
 [   0.    2.    0. -126.]
 [   0.    0.    2.  -72.]
 [   0.    0.    0.    1.]]
C

In [117]:
print(len(X))
print(y)

99
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]


In [183]:
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC  # Support Vector Machine Classifier
from sklearn.metrics import classification_report, roc_auc_score

# Convert lists to numpy arrays
X_grouped = np.array(X_grouped)
y_grouped = np.array(y_grouped)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_grouped, y_grouped, test_size=0.2)
print(f"Size of training data: {X_train.shape[0]} samples")
print(f"Size of testing data: {X_test.shape[0]} samples")

# Initialize and train an SVM classifier
clf = SVC(random_state=42)
clf.fit(X_train, y_train)

# Predict on the test set
y_pred = clf.predict(X_test)

# Print classification report
print(classification_report(y_test, y_pred))

roc_auc = roc_auc_score(y_test, y_pred)
print(f"ROC AUC score: {roc_auc:.2f}")

Size of training data: 79 samples
Size of testing data: 20 samples
              precision    recall  f1-score   support

           0       0.78      1.00      0.88        14
           1       1.00      0.33      0.50         6

    accuracy                           0.80        20
   macro avg       0.89      0.67      0.69        20
weighted avg       0.84      0.80      0.76        20

ROC AUC score: 0.67


In [None]:
# add roc auc value 
# get the top half only instead of the whole FC matrix 
# make sure subjects aren't in both training and test 

In [203]:
from sklearn.model_selection import train_test_split

# load the events from JSON 
with open("subject_movie_events.json", "r") as f:
    event_dict = json.load(f)

# List of fMRI files and corresponding task names (or event keys)
fmri_files = [
    "subset_fmri/sub-S01_ses-1_task-BigBuckBunny_space-MNI_desc-ppres_bold.nii.gz",
    "subset_fmri/sub-S01_ses-1_task-FirstBite_space-MNI_desc-ppres_bold.nii.gz",
    "subset_fmri/sub-S01_ses-1_task-YouAgain_space-MNI_desc-ppres_bold.nii.gz",
    "subset_fmri/sub-S01_ses-2_task-AfterTheRain_space-MNI_desc-ppres_bold.nii.gz",
    "subset_fmri/sub-S01_ses-2_task-LessonLearned_space-MNI_desc-ppres_bold.nii.gz",
    "subset_fmri/sub-S17_ses-3_task-BetweenViewings_space-MNI_desc-ppres_bold.nii.gz",
    "subset_fmri/sub-S17_ses-3_task-FirstBite_space-MNI_desc-ppres_bold.nii.gz",
    "subset_fmri/sub-S17_ses-3_task-Superhero_space-MNI_desc-ppres_bold.nii.gz"
]

# List of task names corresponding to each fMRI file
tasks = [
    "BigBuckBunny",
    "FirstBite",
    "YouAgain",
    "AfterTheRain",
    "LessonLearned",
    "BetweenViewings",
    "Superhero"
]

# Store low and high anxiety chunks for all subjects
all_low_anx_chunks = []
all_high_anx_chunks = []
test_low_anx_chunks = []
test_high_anx_chunks = []

# List of subject IDs (assuming they correspond with each fMRI file)
subject_ids = ['S01', 'S01', 'S01', 'S01', 'S01', 'S17', 'S17', 'S17']

# Split the subject_ids into training and testing sets
train_subjects = ['S01']
test_subjects = ['S17']

# Loop through each subject/task pair
for fmri_file, task_name, subject_id in zip(fmri_files, tasks, subject_ids):
    event_key = f"{subject_id}::{task_name}"

    # Check if the event key exists in the event dictionary
    if event_key in event_dict:
        subject_events = event_dict[event_key]
        print(f"Events found for subject {subject_id} with task {task_name}.")
        
        # Process the fMRI data and extract anxiety chunks
        extracted_chunks, low_anx_chunks, high_anx_chunks = process_fmri_data(fmri_file, subject_events)
        
        # Only add chunks for the corresponding subjects
        if subject_id in train_subjects:
            all_low_anx_chunks.extend(low_anx_chunks)
            all_high_anx_chunks.extend(high_anx_chunks)
        elif subject_id in test_subjects:
            # This ensures no overlap between training and testing data
            test_low_anx_chunks.extend(low_anx_chunks)
            test_high_anx_chunks.extend(high_anx_chunks)
    else:
        print(f"No events found for subject {subject_id} with task {task_name}")

# Now you can train your model on the all_low_anx_chunks and all_high_anx_chunks and test it on the test set

Events found for subject S01 with task BigBuckBunny.
fMRI data loaded. Data shape: (91, 109, 91, 528)
Event 1: Volumes 12–16 → chunk shape: (91, 109, 91, 4) | Anxiety = 70.5
Event 2: Volumes 34–39 → chunk shape: (91, 109, 91, 5) | Anxiety = 17.0
Event 3: Volumes 61–66 → chunk shape: (91, 109, 91, 5) | Anxiety = 92.5
Event 4: Volumes 84–90 → chunk shape: (91, 109, 91, 6) | Anxiety = 65.0
Event 5: Volumes 107–112 → chunk shape: (91, 109, 91, 5) | Anxiety = 75.5
Event 6: Volumes 133–138 → chunk shape: (91, 109, 91, 5) | Anxiety = 95.5
Event 7: Volumes 148–153 → chunk shape: (91, 109, 91, 5) | Anxiety = 100.0
Event 8: Volumes 173–176 → chunk shape: (91, 109, 91, 3) | Anxiety = 100.0
Event 9: Volumes 186–190 → chunk shape: (91, 109, 91, 4) | Anxiety = 51.0
Event 10: Volumes 192–198 → chunk shape: (91, 109, 91, 6) | Anxiety = 64.5
Event 11: Volumes 207–210 → chunk shape: (91, 109, 91, 3) | Anxiety = 9.5
Event 12: Volumes 238–243 → chunk shape: (91, 109, 91, 5) | Anxiety = 79.5
Event 13: Volu

In [204]:
print(test_high_anx_chunks)

[{'chunk': array([[[[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.],
         ...,
         [0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]],

        [[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.],
         ...,
         [0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]],

        [[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.],
         ...,
         [0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]],

        ...,

        [[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.],
         ...,
         [0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]],

        [[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.],
         ...,
         [0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]],

        [[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.],
    

In [205]:
from sklearn.svm import SVC
from sklearn.metrics import classification_report, roc_auc_score
from sklearn.model_selection import train_test_split
import numpy as np

# Features (flattened FC) and labels for training and testing
X_train = []
y_train = []
X_test = []
y_test = []

# Extract features for low and high anxiety chunks for train and test sets
for chunk in all_low_anx_chunks:
    print(f"Chunk data shape: {chunk['chunk'].shape}, affine: {chunk['affine']}")
    if chunk['chunk'].shape[3] == 0:  # Skip empty chunks
        print(f"Skipping empty chunk for anxiety {chunk['anxiety']}")
        continue  # Skip this chunk and move to the next one
    fc = compute_grouped_fc(chunk["chunk"], tr, chunk["affine"], grouped_regions, masker)
    X_train.append(fc.flatten())  # Flatten FC to a 1D feature vector
    y_train.append(0)  # Label for low anxiety

for chunk in all_high_anx_chunks:
    print(f"Chunk data shape: {chunk['chunk'].shape}, affine: {chunk['affine']}")
    if chunk['chunk'].shape[3] == 0:  # Skip empty chunks
        print(f"Skipping empty chunk for anxiety {chunk['anxiety']}")
        continue  # Skip this chunk and move to the next one
    fc = compute_grouped_fc(chunk["chunk"], tr, chunk["affine"], grouped_regions, masker)
    X_train.append(fc.flatten())  # Flatten FC to a 1D feature vector
    y_train.append(1)  # Label for high anxiety

for chunk in test_low_anx_chunks:
    print(f"Chunk data shape: {chunk['chunk'].shape}, affine: {chunk['affine']}")
    if chunk['chunk'].shape[3] == 0:  # Skip empty chunks
        print(f"Skipping empty chunk for anxiety {chunk['anxiety']}")
        continue  # Skip this chunk and move to the next one
    fc = compute_grouped_fc(chunk["chunk"], tr, chunk["affine"], grouped_regions, masker)
    X_test.append(fc.flatten())  # Flatten FC to a 1D feature vector
    y_test.append(0)  # Label for low anxiety

for chunk in test_high_anx_chunks:
    print(f"Chunk data shape: {chunk['chunk'].shape}, affine: {chunk['affine']}")
    if chunk['chunk'].shape[3] == 0:  # Skip empty chunks
        print(f"Skipping empty chunk for anxiety {chunk['anxiety']}")
        continue  # Skip this chunk and move to the next one
    fc = compute_grouped_fc(chunk["chunk"], tr, chunk["affine"], grouped_regions, masker)
    X_test.append(fc.flatten())  # Flatten FC to a 1D feature vector
    y_test.append(1)  # Label for high anxiety

Chunk data shape: (91, 109, 91, 5), affine: [[  -2.    0.    0.   90.]
 [   0.    2.    0. -126.]
 [   0.    0.    2.  -72.]
 [   0.    0.    0.    1.]]
Chunk data shape: (91, 109, 91, 3), affine: [[  -2.    0.    0.   90.]
 [   0.    2.    0. -126.]
 [   0.    0.    2.  -72.]
 [   0.    0.    0.    1.]]
Chunk data shape: (91, 109, 91, 5), affine: [[  -2.    0.    0.   90.]
 [   0.    2.    0. -126.]
 [   0.    0.    2.  -72.]
 [   0.    0.    0.    1.]]
Chunk data shape: (91, 109, 91, 4), affine: [[  -2.    0.    0.   90.]
 [   0.    2.    0. -126.]
 [   0.    0.    2.  -72.]
 [   0.    0.    0.    1.]]
Chunk data shape: (91, 109, 91, 5), affine: [[  -2.    0.    0.   90.]
 [   0.    2.    0. -126.]
 [   0.    0.    2.  -72.]
 [   0.    0.    0.    1.]]
Chunk data shape: (91, 109, 91, 4), affine: [[  -2.    0.    0.   90.]
 [   0.    2.    0. -126.]
 [   0.    0.    2.  -72.]
 [   0.    0.    0.    1.]]
Chunk data shape: (91, 109, 91, 8), affine: [[  -2.    0.    0.   90.]
 [   0.    

In [206]:
print(X_test)

[array([ 1.        ,  0.82222758,  0.34937126,  0.40541543,  0.33284095,
        0.02260258,  0.64095494,  0.82222758,  1.        ,  0.67228741,
        0.28659994,  0.59212161,  0.47414361,  0.74936251,  0.34937126,
        0.67228741,  1.        , -0.33276981,  0.5930572 ,  0.90488121,
        0.40983903,  0.40541543,  0.28659994, -0.33276981,  1.        ,
        0.03029761, -0.51267034,  0.10190366,  0.33284095,  0.59212161,
        0.5930572 ,  0.03029761,  1.        ,  0.66248706,  0.8129168 ,
        0.02260258,  0.47414361,  0.90488121, -0.51267034,  0.66248706,
        1.        ,  0.41406687,  0.64095494,  0.74936251,  0.40983903,
        0.10190366,  0.8129168 ,  0.41406687,  1.        ]), array([ 1.        , -0.88648072,  0.59613903, -0.29154576, -0.71232621,
       -0.86195363, -0.32433135, -0.88648072,  1.        , -0.18640911,
        0.39120982,  0.46451235,  0.990503  ,  0.70234054,  0.59613903,
       -0.18640911,  1.        ,  0.32430074, -0.88002858, -0.11053238,
  

In [219]:
# Convert lists to numpy arrays
X_train = np.array(X_train).reshape(-1, X_train[0].shape[0])  # Reshapes to 2D if necessary
y_train = np.array(y_train)
X_test = np.array(X_test).reshape(-1, X_test[0].shape[0])  # Reshapes to 2D if necessary
y_test = np.array(y_test)

# Train a classifier (SVM in this case)
clf = SVC(kernel='linear')
clf.fit(X_train, y_train)

# Predict on the test set
y_pred = clf.predict(X_test)

# Print the classification report
print("Classification Report:")
print(classification_report(y_test, y_pred))

# Calculate ROC AUC score
roc_auc = roc_auc_score(y_test, clf.predict_proba(X_test)[:, 1])
print(f"ROC AUC Score: {roc_auc:.3f}")

Classification Report:
              precision    recall  f1-score   support

           0       0.80      0.23      0.36        35
           1       0.10      0.60      0.17         5

    accuracy                           0.28        40
   macro avg       0.45      0.41      0.26        40
weighted avg       0.71      0.28      0.33        40



AttributeError: This 'SVC' has no attribute 'predict_proba'

mixed effects linear model 