In [11]:
%matplotlib inline

Within-subject SVM classification based on beta weights (per trials) averaged within networks from different grains of MIST parcellation, for CIMAQ memory encoding task (fMRI data).
Mean network betas reflect the engagement of a particular network for each trial.
MIST Parcellations include: 7, 12, 20, 36, 64, 122, 197, 325, 444 networks

Trials (conditions) are classifierd according to:
- task condition (encoding or control task)
- memory performance (hit vs miss, correct vs incorrect source)
- stimulus category (?)

Each model is ran and tested on data from the same subject, and then group statistics (confidence intervals) are computed around accuracy scores from each individual participant.

In [12]:
import os
import sys
import glob
import numpy as np
import pandas as pd
import nilearn
import scipy
import nibabel as nb
import sklearn
import seaborn as sns
import itertools

from numpy import nan as NaN
from matplotlib import pyplot as plt
from nilearn import image, plotting
from nilearn import masking
from nilearn import plotting
from nilearn import datasets
from nilearn.plotting import plot_stat_map, plot_roi, plot_anat, plot_img, show
from nilearn.input_data import NiftiMasker, NiftiLabelsMasker
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC, LinearSVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, precision_score, f1_score
from sklearn.model_selection import cross_val_predict, cross_val_score
from sklearn.preprocessing import MinMaxScaler


Step 1: import list of participants, and generate sublists of participants who have enough trials per category for each classification.

Encoding vs Control task conditions (all 94)
Stimulus category (all 94)
Hit versus Miss (42 participants; at least 15 trials per condition)
Correct Source versus Wrong Source (49 participants; at least 15 trials per condition)
Correct Source versus Miss (38 participants; at least 15 trials per condition)
*NOTE: ADD filter to exclude participants with too many scrubbed frames?? *

In [13]:

# Path to directory with participant lists
data_file = '/Users/mombot/Documents/Simexp/CIMAQ/Data/Participants/Splitting/Sub_list.tsv'
sub_data = pd.read_csv(data_file, sep = '\t')

# Exclude participants who failed QC
sub_data = sub_data[sub_data['QC_status']!= 'F']

## ADD filter to exclude participants with too many scrubbed frames?? ##

# Set minimal number of trials needed per subject to include them in analysis
num = 14

# Encoding vs Control, and Stimulus Category classifications
all_subs = sub_data['participant_id']
all_diagnosis = sub_data['cognitive_status']
print(all_subs)
print(len(all_subs))

# Hit versus Miss
hm_data = sub_data[sub_data['hits'] > num]
hm_data = hm_data[hm_data['miss'] > num]
hm_subs = hm_data['participant_id']
hm_diagnosis = hm_data['cognitive_status']
print(hm_subs)
print(len(hm_subs))

# Correct Source versus Wrong Source 
cw_data = sub_data[sub_data['correct_source'] > num]
cw_data = cw_data[cw_data['wrong_source'] > num]
cw_subs = cw_data['participant_id']
cw_diagnosis = cw_data['cognitive_status']
print(cw_subs)
print(len(cw_subs))

# Correct Source versus Miss
cmiss_data = sub_data[sub_data['correct_source'] > num]
cmiss_data = cmiss_data[cmiss_data['miss'] > num]
cmiss_subs = cmiss_data['participant_id']
cmiss_diagnosis = cmiss_data['cognitive_status']
print(cmiss_subs)
print(len(cmiss_subs))


0      108391
1      120839
2      122922
3      127228
4      139593
6      147863
7      150649
8      164965
9      175295
10     178101
11     189005
12     197192
14     199801
15     219637
16     229301
17     247659
18     254402
19     255499
20     258618
21     258912
22     267168
23     270218
24     271596
27     314409
28     326073
29     336665
30     337021
31     350555
32     370092
34     385370
        ...  
70     763590
71     778749
72     783781
73     785217
74     785245
75     804743
77     845675
78     866812
79     878354
80     884343
81     886007
83     893978
85     901551
86     906145
87     914042
88     915022
89     920577
90     932933
91     936730
92     938001
93     955548
94     956049
95     956130
96     968913
97     974246
98     979001
99     983291
100    988602
101    996599
102    998166
Name: participant_id, Length: 94, dtype: int64
94
0      108391
2      122922
4      139593
8      164965
14     199801
17     247659
19     25549

Step 2. Set up paths of directories of interest

Create empty data structures to save and export classification results


In [14]:
# set paths to directories of interest
beta_dir = '/Users/mombot/Documents/Simexp/CIMAQ/Data/Nistats/Betas'
label_dir = '/Users/mombot/Documents/Simexp/CIMAQ/Data/Nistats/Events'
mask_dir = '/Users/mombot/Documents/Simexp/CIMAQ/Data/masks'
output_dir = '/Users/mombot/Documents/Simexp/CIMAQ/Data/Nilearn/Group_results'


Step 3. ENCODING VERSUS CONTROL TASK CLASSIFICATION

Build and test model for each participant on list, and compile data in a single pandas dataframe

In [15]:
# ENCODING VERSUS CONTROL TASK CLASSIFICATION

# build data structure to store accuracy data and coefficients
enc_ctl_data = pd.DataFrame()
enc_ctl_data.insert(loc = 0, column = 'dccid', value = 'None', allow_duplicates=True)
# enc_ctl_data.insert(loc = 1, column = 'diagnosis', value = 'None', allow_duplicates=True)
for i in range(0, 10):
    enc_ctl_data.insert(loc = enc_ctl_data.shape[1], column = 'CV'+str(i+1)+'_acc', value = NaN, allow_duplicates=True)
enc_ctl_data.insert(loc = enc_ctl_data.shape[1], column = 'TrainSet_MeanCV_acc', value = 'None', allow_duplicates=True)
enc_ctl_data.insert(loc = enc_ctl_data.shape[1], column = 'TestSet_acc', value = 'None', allow_duplicates=True)

for sub in all_subs:
    print(sub)
    s_data = [sub]
    # load subject's beta maps (one per trial)
    betas = image.load_img(img=os.path.join(beta_dir, str(sub), 'TrialContrasts/betas_sub'+str(sub)+'*.nii'),
                           wildcards=True)
    # initialize NiftiMasker object    
    sub_mask = nb.load(os.path.join(mask_dir, 'func_sub'+str(sub)+'_mask_stereonl.nii'))
    sub_masker = NiftiMasker(mask_img=sub_mask, standardize=True)
    
    # transform subject's beta maps into vector of network means per trial
    X_enc_ctl = sub_masker.fit_transform(betas)

    # load subject's trial labels
    labels_file = os.path.join(label_dir, 'sub-'+str(sub)+'_enco_ctl.tsv')
    enco_ctl_labels = pd.read_csv(labels_file, sep='\t')
    y_enco_ctl = enco_ctl_labels['condition']
    
    # mask data to exclude trials of no interest
    # does not apply here
    
    # Split trials into a training and a test set
    X_train, X_test, y_train, y_test = train_test_split(
        X_enc_ctl, # x
        y_enco_ctl, # y
        test_size = 0.4, # 60%/40% split
        shuffle = True, # shuffle dataset before splitting
        stratify = y_enco_ctl, # keep distribution of conditions consistent betw. train & test sets
        #random_state = 123  # if set number, same shuffle each time, otherwise randomization algo
        ) 
    print('training:', len(X_train), 'testing:', len(X_test))
    print(y_train.value_counts(), y_test.value_counts())
    
    # define the model
    sub_svc = SVC(kernel='linear', class_weight='balanced')
    
    # do cross-validation to evaluate model performance
    # within 10 folds of training set
    # predict
    y_pred = cross_val_predict(sub_svc, X_train, y_train,
                               groups=y_train, cv=10)
    # scores
    cv_acc = cross_val_score(sub_svc, X_train, y_train,
                         groups=y_train, cv=10)
    print(cv_acc)
    
    for i in range(0, len(cv_acc)):
        s_data.append(cv_acc[i])
        
    # evaluate overall model performance on training data
    overall_acc = accuracy_score(y_pred = y_pred, y_true = y_train)
    overall_cr = classification_report(y_pred = y_pred, y_true = y_train)
    print('Accuracy:',overall_acc)
    print(overall_cr)
    
    s_data.append(overall_acc)

    # Test model on unseen data from the test set
    sub_svc.fit(X_train, y_train)
    y_pred = sub_svc.predict(X_test) # classify age class using testing data
    acc = sub_svc.score(X_test, y_test) # get accuracy

    cr = classification_report(y_pred=y_pred, y_true=y_test) # get prec., recall & f1
    # print results
    print('accuracy =', acc)
    print(cr)  
    
    s_data.append(acc)
    
    # get map of coefficients    
    # coef_ = sub_svc.coef_
    # print(coef_.shape)
    #Return voxel weights into a nifti image using the NiftiMasker
    # coef_img = sub_masker.inverse_transform(coef_)
    #Save .nii to file
    # coef_img.to_filename(os.path.join(output_dir, 'Coef_maps', 'SVC_coeff_enc_ctl_sub-'+str(sub)+'.nii'))

    enc_ctl_data = enc_ctl_data.append(pd.Series(s_data, index=enc_ctl_data.columns), ignore_index=True)

demo_data = sub_data.copy()
demo_data.reset_index(level=None, drop=False, inplace=True)

enc_ctl_data.insert(loc = 1, column = 'cognitive_status', value = demo_data['cognitive_status'], allow_duplicates=True)
enc_ctl_data.insert(loc = 2, column = 'total_scrubbed_frames', value = demo_data['total_scrubbed_frames'], allow_duplicates=True)
enc_ctl_data.insert(loc = 3, column = 'mean_FD', value = demo_data['mean_FD'], allow_duplicates=True)
enc_ctl_data.insert(loc = 4, column = 'hits', value = demo_data['hits'], allow_duplicates=True)
enc_ctl_data.insert(loc = 5, column = 'miss', value = demo_data['miss'], allow_duplicates=True)
enc_ctl_data.insert(loc = 6, column = 'correct_source', value = demo_data['correct_source'], allow_duplicates=True)
enc_ctl_data.insert(loc = 7, column = 'wrong_source', value = demo_data['wrong_source'], allow_duplicates=True)
enc_ctl_data.insert(loc = 8, column = 'dprime', value = demo_data['dprime'], allow_duplicates=True)
enc_ctl_data.insert(loc = 9, column = 'associative_memScore', value = demo_data['associative_memScore'], allow_duplicates=True)    
    
enc_ctl_data.to_csv(os.path.join(output_dir, 'SVC_withinSub_enc_ctl_wholeBrain.tsv'),
    sep='\t', header=True, index=False)


108391
training: 70 testing: 47
Enc    47
CTL    23
Name: condition, dtype: int64 Enc    31
CTL    16
Name: condition, dtype: int64
[0.625      0.625      0.75       0.71428571 0.71428571 1.
 0.85714286 0.66666667 1.         0.83333333]
Accuracy: 0.7714285714285715
              precision    recall  f1-score   support

         CTL       0.68      0.57      0.62        23
         Enc       0.80      0.87      0.84        47

   micro avg       0.77      0.77      0.77        70
   macro avg       0.74      0.72      0.73        70
weighted avg       0.76      0.77      0.77        70

accuracy = 0.723404255319149
              precision    recall  f1-score   support

         CTL       0.67      0.38      0.48        16
         Enc       0.74      0.90      0.81        31

   micro avg       0.72      0.72      0.72        47
   macro avg       0.70      0.64      0.65        47
weighted avg       0.71      0.72      0.70        47

120839
training: 70 testing: 47
Enc    47
CTL    23

accuracy = 0.8085106382978723
              precision    recall  f1-score   support

         CTL       0.71      0.75      0.73        16
         Enc       0.87      0.84      0.85        31

   micro avg       0.81      0.81      0.81        47
   macro avg       0.79      0.79      0.79        47
weighted avg       0.81      0.81      0.81        47

178101
training: 70 testing: 47
Enc    47
CTL    23
Name: condition, dtype: int64 Enc    31
CTL    16
Name: condition, dtype: int64
[0.75       0.625      0.875      0.85714286 0.71428571 0.85714286
 1.         0.33333333 0.66666667 1.        ]
Accuracy: 0.7714285714285715
              precision    recall  f1-score   support

         CTL       0.67      0.61      0.64        23
         Enc       0.82      0.85      0.83        47

   micro avg       0.77      0.77      0.77        70
   macro avg       0.74      0.73      0.73        70
weighted avg       0.77      0.77      0.77        70

accuracy = 0.8085106382978723
            

accuracy = 0.6521739130434783
              precision    recall  f1-score   support

         CTL       0.50      0.56      0.53        16
         Enc       0.75      0.70      0.72        30

   micro avg       0.65      0.65      0.65        46
   macro avg       0.62      0.63      0.63        46
weighted avg       0.66      0.65      0.66        46

258618
training: 70 testing: 47
Enc    47
CTL    23
Name: condition, dtype: int64 Enc    31
CTL    16
Name: condition, dtype: int64
[0.875      0.875      0.875      1.         1.         0.71428571
 1.         0.83333333 1.         0.83333333]
Accuracy: 0.9
              precision    recall  f1-score   support

         CTL       0.86      0.83      0.84        23
         Enc       0.92      0.94      0.93        47

   micro avg       0.90      0.90      0.90        70
   macro avg       0.89      0.88      0.89        70
weighted avg       0.90      0.90      0.90        70

accuracy = 0.851063829787234
              precision    r

accuracy = 0.8085106382978723
              precision    recall  f1-score   support

         CTL       0.73      0.69      0.71        16
         Enc       0.84      0.87      0.86        31

   micro avg       0.81      0.81      0.81        47
   macro avg       0.79      0.78      0.78        47
weighted avg       0.81      0.81      0.81        47

350555
training: 70 testing: 47
Enc    47
CTL    23
Name: condition, dtype: int64 Enc    31
CTL    16
Name: condition, dtype: int64
[0.875      0.625      0.75       0.71428571 1.         0.85714286
 0.71428571 1.         0.83333333 0.83333333]
Accuracy: 0.8142857142857143
              precision    recall  f1-score   support

         CTL       0.78      0.61      0.68        23
         Enc       0.83      0.91      0.87        47

   micro avg       0.81      0.81      0.81        70
   macro avg       0.80      0.76      0.78        70
weighted avg       0.81      0.81      0.81        70

accuracy = 0.7872340425531915
            

accuracy = 0.7659574468085106
              precision    recall  f1-score   support

         CTL       0.86      0.38      0.52        16
         Enc       0.75      0.97      0.85        31

   micro avg       0.77      0.77      0.77        47
   macro avg       0.80      0.67      0.68        47
weighted avg       0.79      0.77      0.74        47

437101
training: 70 testing: 47
Enc    47
CTL    23
Name: condition, dtype: int64 Enc    31
CTL    16
Name: condition, dtype: int64
[0.75       0.75       0.75       0.85714286 0.85714286 0.85714286
 0.85714286 0.66666667 0.83333333 0.83333333]
Accuracy: 0.8
              precision    recall  f1-score   support

         CTL       0.76      0.57      0.65        23
         Enc       0.81      0.91      0.86        47

   micro avg       0.80      0.80      0.80        70
   macro avg       0.79      0.74      0.76        70
weighted avg       0.80      0.80      0.79        70

accuracy = 0.7659574468085106
              precision    

accuracy = 0.851063829787234
              precision    recall  f1-score   support

         CTL       0.85      0.69      0.76        16
         Enc       0.85      0.94      0.89        31

   micro avg       0.85      0.85      0.85        47
   macro avg       0.85      0.81      0.83        47
weighted avg       0.85      0.85      0.85        47

517070
training: 70 testing: 47
Enc    47
CTL    23
Name: condition, dtype: int64 Enc    31
CTL    16
Name: condition, dtype: int64
[0.75       0.75       0.75       0.71428571 1.         0.71428571
 1.         1.         0.83333333 0.33333333]
Accuracy: 0.7857142857142857
              precision    recall  f1-score   support

         CTL       0.79      0.48      0.59        23
         Enc       0.79      0.94      0.85        47

   micro avg       0.79      0.79      0.79        70
   macro avg       0.79      0.71      0.72        70
weighted avg       0.79      0.79      0.77        70

accuracy = 0.8936170212765957
             

accuracy = 0.8297872340425532
              precision    recall  f1-score   support

         CTL       0.83      0.62      0.71        16
         Enc       0.83      0.94      0.88        31

   micro avg       0.83      0.83      0.83        47
   macro avg       0.83      0.78      0.80        47
weighted avg       0.83      0.83      0.82        47

630120
training: 70 testing: 47
Enc    47
CTL    23
Name: condition, dtype: int64 Enc    31
CTL    16
Name: condition, dtype: int64
[1.         1.         0.75       0.57142857 0.71428571 0.85714286
 1.         1.         0.83333333 1.        ]
Accuracy: 0.8714285714285714
              precision    recall  f1-score   support

         CTL       0.82      0.78      0.80        23
         Enc       0.90      0.91      0.91        47

   micro avg       0.87      0.87      0.87        70
   macro avg       0.86      0.85      0.85        70
weighted avg       0.87      0.87      0.87        70

accuracy = 0.7021276595744681
            

accuracy = 0.8723404255319149
              precision    recall  f1-score   support

         CTL       0.86      0.75      0.80        16
         Enc       0.88      0.94      0.91        31

   micro avg       0.87      0.87      0.87        47
   macro avg       0.87      0.84      0.85        47
weighted avg       0.87      0.87      0.87        47

748676
training: 70 testing: 47
Enc    47
CTL    23
Name: condition, dtype: int64 Enc    31
CTL    16
Name: condition, dtype: int64
[0.875      0.875      0.625      0.85714286 0.85714286 0.85714286
 0.42857143 0.33333333 1.         0.66666667]
Accuracy: 0.7428571428571429
              precision    recall  f1-score   support

         CTL       0.63      0.52      0.57        23
         Enc       0.78      0.85      0.82        47

   micro avg       0.74      0.74      0.74        70
   macro avg       0.71      0.69      0.69        70
weighted avg       0.73      0.74      0.74        70

accuracy = 0.723404255319149
             

accuracy = 0.6808510638297872
              precision    recall  f1-score   support

         CTL       0.56      0.31      0.40        16
         Enc       0.71      0.87      0.78        31

   micro avg       0.68      0.68      0.68        47
   macro avg       0.63      0.59      0.59        47
weighted avg       0.66      0.68      0.65        47

878354
training: 70 testing: 47
Enc    47
CTL    23
Name: condition, dtype: int64 Enc    31
CTL    16
Name: condition, dtype: int64
[0.5        0.875      0.625      1.         0.85714286 1.
 0.71428571 0.66666667 0.83333333 0.83333333]
Accuracy: 0.7857142857142857
              precision    recall  f1-score   support

         CTL       0.67      0.70      0.68        23
         Enc       0.85      0.83      0.84        47

   micro avg       0.79      0.79      0.79        70
   macro avg       0.76      0.76      0.76        70
weighted avg       0.79      0.79      0.79        70

accuracy = 0.7446808510638298
              precis

accuracy = 0.7872340425531915
              precision    recall  f1-score   support

         CTL       0.71      0.62      0.67        16
         Enc       0.82      0.87      0.84        31

   micro avg       0.79      0.79      0.79        47
   macro avg       0.77      0.75      0.76        47
weighted avg       0.78      0.79      0.78        47

932933
training: 70 testing: 47
Enc    47
CTL    23
Name: condition, dtype: int64 Enc    31
CTL    16
Name: condition, dtype: int64
[0.875      0.75       0.875      1.         0.71428571 0.71428571
 0.85714286 1.         0.83333333 0.83333333]
Accuracy: 0.8428571428571429
              precision    recall  f1-score   support

         CTL       0.80      0.70      0.74        23
         Enc       0.86      0.91      0.89        47

   micro avg       0.84      0.84      0.84        70
   macro avg       0.83      0.81      0.82        70
weighted avg       0.84      0.84      0.84        70

accuracy = 0.8723404255319149
            

accuracy = 0.8085106382978723
              precision    recall  f1-score   support

         CTL       0.77      0.62      0.69        16
         Enc       0.82      0.90      0.86        31

   micro avg       0.81      0.81      0.81        47
   macro avg       0.80      0.76      0.78        47
weighted avg       0.81      0.81      0.80        47

983291
training: 70 testing: 47
Enc    47
CTL    23
Name: condition, dtype: int64 Enc    31
CTL    16
Name: condition, dtype: int64
[0.875      0.75       0.875      0.71428571 0.85714286 0.85714286
 1.         0.66666667 1.         0.66666667]
Accuracy: 0.8285714285714286
              precision    recall  f1-score   support

         CTL       0.76      0.70      0.73        23
         Enc       0.86      0.89      0.88        47

   micro avg       0.83      0.83      0.83        70
   macro avg       0.81      0.79      0.80        70
weighted avg       0.83      0.83      0.83        70

accuracy = 0.9361702127659575
            

In [16]:
# HIT VERSUS MISS TRIAL CLASSIFICATION

# build data structure to store accuracy data and coefficients
hit_miss_data = pd.DataFrame()
hit_miss_data.insert(loc = 0, column = 'dccid', value = 'None', allow_duplicates=True)
# hit_miss_data.insert(loc = 1, column = 'diagnosis', value = 'None', allow_duplicates=True)
for i in range(0, 7):
    hit_miss_data.insert(loc = hit_miss_data.shape[1], column = 'CV'+str(i+1)+'_acc', value = NaN, allow_duplicates=True)
hit_miss_data.insert(loc = hit_miss_data.shape[1], column = 'TrainSet_MeanCV_acc', value = 'None', allow_duplicates=True)
hit_miss_data.insert(loc = hit_miss_data.shape[1], column = 'TestSet_acc', value = 'None', allow_duplicates=True)

for sub in hm_subs:
    print(sub)
    s_data = [sub]
    # load subject's beta maps (one per trial)
    betas = image.load_img(img=os.path.join(beta_dir, str(sub), 'TrialContrasts/betas_sub'+str(sub)+'*.nii'),
                           wildcards=True)
    # initialize NiftiLabelMasker object    
    sub_mask = nb.load(os.path.join(mask_dir, 'func_sub'+str(sub)+'_mask_stereonl.nii'))
    sub_masker = NiftiMasker(mask_img=sub_mask, standardize=True)
    # transform subject's beta maps into vector of network means per trial
    X_hit_miss_ctl = sub_masker.fit_transform(betas)   
    
    # load subject's trial labels
    labels_file = os.path.join(label_dir, 'sub-'+str(sub)+'_ctl_miss_hit.tsv')
    y_hit_miss_ctl = pd.read_csv(labels_file, sep='\t')
    y_hit_miss_ctl_labels = y_hit_miss_ctl['ctl_miss_hit']
    # mask X and y data to exclude trials of no interest
    hit_miss_mask = y_hit_miss_ctl_labels.isin(['hit', 'missed'])
    y_hit_miss = y_hit_miss_ctl_labels[hit_miss_mask]      
    X_hit_miss  = X_hit_miss_ctl[hit_miss_mask]
    
    # Split trials into a training and a test set
    X_train, X_test, y_train, y_test = train_test_split(
        X_hit_miss, # x
        y_hit_miss, # y
        test_size = 0.4, # 60%/40% split
        shuffle = True, # shuffle dataset before splitting
        stratify = y_hit_miss, # keep distribution of conditions consistent betw. train & test sets
        #random_state = 123  # if set number, same shuffle each time, otherwise randomization algo
        ) 
    print('training:', len(X_train), 'testing:', len(X_test))
    print(y_train.value_counts(), y_test.value_counts())
    
    # define the model
    sub_svc = SVC(kernel='linear', class_weight='balanced')
    
    # do cross-validation to evaluate model performance
    # within 10 folds of training set
    # predict
    y_pred = cross_val_predict(sub_svc, X_train, y_train,
                               groups=y_train, cv=7)
    # scores
    cv_acc = cross_val_score(sub_svc, X_train, y_train,
                         groups=y_train, cv=7)
    print(cv_acc)
    
    for i in range(0, len(cv_acc)):
        s_data.append(cv_acc[i])
        
    # evaluate overall model performance on training data
    overall_acc = accuracy_score(y_pred = y_pred, y_true = y_train)
    overall_cr = classification_report(y_pred = y_pred, y_true = y_train)
    print('Accuracy:',overall_acc)
    print(overall_cr)
    
    s_data.append(overall_acc)

    # Test model on unseen data from the test set
    sub_svc.fit(X_train, y_train)
    y_pred = sub_svc.predict(X_test) # classify age class using testing data
    acc = sub_svc.score(X_test, y_test) # get accuracy

    cr = classification_report(y_pred=y_pred, y_true=y_test) # get prec., recall & f1
    # print results
    print('accuracy =', acc)
    print(cr)  
    
    s_data.append(acc)
    
    # get map of coefficients    
    # coef_ = sub_svc.coef_
    # print(coef_.shape)
    #Return voxel weights into a nifti image using the NiftiMasker
    # coef_img = sub_masker.inverse_transform(coef_)
    #Save .nii to file
    # coef_img.to_filename(os.path.join(output_dir, 'Coef_maps', 'SVC_coeff_hit_miss_sub-'+str(sub)+'.nii'))

    hit_miss_data = hit_miss_data.append(pd.Series(s_data, index=hit_miss_data.columns), ignore_index=True)

demo_data = hm_data.copy()
demo_data.reset_index(level=None, drop=False, inplace=True)

hit_miss_data.insert(loc = 1, column = 'cognitive_status', value = demo_data['cognitive_status'], allow_duplicates=True)
hit_miss_data.insert(loc = 2, column = 'total_scrubbed_frames', value = demo_data['total_scrubbed_frames'], allow_duplicates=True)
hit_miss_data.insert(loc = 3, column = 'mean_FD', value = demo_data['mean_FD'], allow_duplicates=True)
hit_miss_data.insert(loc = 4, column = 'hits', value = demo_data['hits'], allow_duplicates=True)
hit_miss_data.insert(loc = 5, column = 'miss', value = demo_data['miss'], allow_duplicates=True)
hit_miss_data.insert(loc = 6, column = 'correct_source', value = demo_data['correct_source'], allow_duplicates=True)
hit_miss_data.insert(loc = 7, column = 'wrong_source', value = demo_data['wrong_source'], allow_duplicates=True)
hit_miss_data.insert(loc = 8, column = 'dprime', value = demo_data['dprime'], allow_duplicates=True)
hit_miss_data.insert(loc = 9, column = 'associative_memScore', value = demo_data['associative_memScore'], allow_duplicates=True)    

hit_miss_data.to_csv(os.path.join(output_dir, 'SVC_withinSub_hit_miss_wholeBrain.tsv'),
    sep='\t', header=True, index=False)


108391
training: 46 testing: 32
hit       37
missed     9
Name: ctl_miss_hit, dtype: int64 hit       26
missed     6
Name: ctl_miss_hit, dtype: int64
[0.625      0.375      0.5        0.83333333 0.66666667 0.66666667
 0.83333333]
Accuracy: 0.6304347826086957
              precision    recall  f1-score   support

         hit       0.76      0.78      0.77        37
      missed       0.00      0.00      0.00         9

   micro avg       0.63      0.63      0.63        46
   macro avg       0.38      0.39      0.39        46
weighted avg       0.61      0.63      0.62        46

accuracy = 0.71875
              precision    recall  f1-score   support

         hit       0.81      0.85      0.83        26
      missed       0.20      0.17      0.18         6

   micro avg       0.72      0.72      0.72        32
   macro avg       0.51      0.51      0.51        32
weighted avg       0.70      0.72      0.71        32

122922
training: 46 testing: 32
hit       32
missed    14
Name: ctl_

training: 44 testing: 30
hit       30
missed    14
Name: ctl_miss_hit, dtype: int64 hit       21
missed     9
Name: ctl_miss_hit, dtype: int64
[0.42857143 1.         0.66666667 0.83333333 0.5        0.83333333
 0.5       ]
Accuracy: 0.6818181818181818
              precision    recall  f1-score   support

         hit       0.74      0.83      0.78        30
      missed       0.50      0.36      0.42        14

   micro avg       0.68      0.68      0.68        44
   macro avg       0.62      0.60      0.60        44
weighted avg       0.66      0.68      0.67        44

accuracy = 0.5666666666666667
              precision    recall  f1-score   support

         hit       0.75      0.57      0.65        21
      missed       0.36      0.56      0.43         9

   micro avg       0.57      0.57      0.57        30
   macro avg       0.55      0.56      0.54        30
weighted avg       0.63      0.57      0.58        30

314409
training: 46 testing: 32
hit       27
missed    19
Name: 

training: 46 testing: 32
hit       35
missed    11
Name: ctl_miss_hit, dtype: int64 hit       25
missed     7
Name: ctl_miss_hit, dtype: int64
[0.85714286 1.         0.71428571 0.71428571 0.83333333 0.83333333
 0.66666667]
Accuracy: 0.8043478260869565
              precision    recall  f1-score   support

         hit       0.82      0.94      0.88        35
      missed       0.67      0.36      0.47        11

   micro avg       0.80      0.80      0.80        46
   macro avg       0.75      0.65      0.68        46
weighted avg       0.79      0.80      0.78        46

accuracy = 0.8125
              precision    recall  f1-score   support

         hit       0.81      1.00      0.89        25
      missed       1.00      0.14      0.25         7

   micro avg       0.81      0.81      0.81        32
   macro avg       0.90      0.57      0.57        32
weighted avg       0.85      0.81      0.75        32

458807
training: 46 testing: 32
hit       31
missed    15
Name: ctl_miss_hit

training: 46 testing: 32
hit       25
missed    21
Name: ctl_miss_hit, dtype: int64 hit       18
missed    14
Name: ctl_miss_hit, dtype: int64
[0.28571429 0.71428571 0.28571429 0.42857143 0.33333333 0.16666667
 0.16666667]
Accuracy: 0.34782608695652173
              precision    recall  f1-score   support

         hit       0.41      0.44      0.42        25
      missed       0.26      0.24      0.25        21

   micro avg       0.35      0.35      0.35        46
   macro avg       0.34      0.34      0.34        46
weighted avg       0.34      0.35      0.34        46

accuracy = 0.40625
              precision    recall  f1-score   support

         hit       0.47      0.50      0.49        18
      missed       0.31      0.29      0.30        14

   micro avg       0.41      0.41      0.41        32
   macro avg       0.39      0.39      0.39        32
weighted avg       0.40      0.41      0.40        32

729722
training: 46 testing: 32
hit       32
missed    14
Name: ctl_miss_h

training: 46 testing: 32
missed    28
hit       18
Name: ctl_miss_hit, dtype: int64 missed    19
hit       13
Name: ctl_miss_hit, dtype: int64
[0.57142857 0.85714286 0.42857143 0.42857143 0.5        0.66666667
 0.5       ]
Accuracy: 0.5652173913043478
              precision    recall  f1-score   support

         hit       0.44      0.44      0.44        18
      missed       0.64      0.64      0.64        28

   micro avg       0.57      0.57      0.57        46
   macro avg       0.54      0.54      0.54        46
weighted avg       0.57      0.57      0.57        46

accuracy = 0.625
              precision    recall  f1-score   support

         hit       0.55      0.46      0.50        13
      missed       0.67      0.74      0.70        19

   micro avg       0.62      0.62      0.62        32
   macro avg       0.61      0.60      0.60        32
weighted avg       0.62      0.62      0.62        32

936730
training: 45 testing: 31
hit       27
missed    18
Name: ctl_miss_hit,

In [17]:
# CORRECT SOURCE VERSUS WRONG SOURCE TRIAL CLASSIFICATION

# build data structure to store accuracy data and coefficients
cs_ws_data = pd.DataFrame()
cs_ws_data.insert(loc = 0, column = 'dccid', value = 'None', allow_duplicates=True)
# cs_ws_data.insert(loc = 1, column = 'diagnosis', value = 'None', allow_duplicates=True)
for i in range(0, 7):
    cs_ws_data.insert(loc = cs_ws_data.shape[1], column = 'CV'+str(i+1)+'_acc', value = NaN, allow_duplicates=True)
cs_ws_data.insert(loc = cs_ws_data.shape[1], column = 'TrainSet_MeanCV_acc', value = 'None', allow_duplicates=True)
cs_ws_data.insert(loc = cs_ws_data.shape[1], column = 'TestSet_acc', value = 'None', allow_duplicates=True)

for sub in cw_subs:
    print(sub)
    s_data = [sub]
    # load subject's beta maps (one per trial)
    betas = image.load_img(img=os.path.join(beta_dir, str(sub), 'TrialContrasts/betas_sub'+str(sub)+'*.nii'),
                           wildcards=True)
    # initialize NiftiLabelMasker object    
    sub_mask = nb.load(os.path.join(mask_dir, 'func_sub'+str(sub)+'_mask_stereonl.nii'))
    sub_masker = NiftiMasker(mask_img=sub_mask, standardize=True)
                   
    # transform subject's beta maps into vector of network means per trial
    X_cs_ws_miss_ctl = sub_masker.fit_transform(betas)  
    
    # load subject's trial labels
    labels_file = os.path.join(label_dir, 'sub-'+str(sub)+'_ctl_miss_ws_cs.tsv')
    y_cs_ws_miss_ctl = pd.read_csv(labels_file, sep='\t')
    y_cs_ws_miss_ctl_labels = y_cs_ws_miss_ctl['ctl_miss_ws_cs']
    # mask X and y data to exclude trials of no interest
    cs_ws_mask = y_cs_ws_miss_ctl_labels.isin(['correctsource', 'wrongsource'])
    y_cs_ws = y_cs_ws_miss_ctl_labels[cs_ws_mask]      
    X_cs_ws  = X_cs_ws_miss_ctl[cs_ws_mask]
    
    # Split trials into a training and a test set
    X_train, X_test, y_train, y_test = train_test_split(
        X_cs_ws, # x
        y_cs_ws, # y
        test_size = 0.4, # 60%/40% split
        shuffle = True, # shuffle dataset before splitting
        stratify = y_cs_ws, # keep distribution of conditions consistent betw. train & test sets
        #random_state = 123  # if set number, same shuffle each time, otherwise randomization algo
        ) 
    print('training:', len(X_train), 'testing:', len(X_test))
    print(y_train.value_counts(), y_test.value_counts())
    
    # define the model
    sub_svc = SVC(kernel='linear', class_weight='balanced')
    
    # do cross-validation to evaluate model performance
    # within 10 folds of training set
    # predict
    y_pred = cross_val_predict(sub_svc, X_train, y_train,
                               groups=y_train, cv=7)
    # scores
    cv_acc = cross_val_score(sub_svc, X_train, y_train,
                         groups=y_train, cv=7)
    print(cv_acc)
    
    for i in range(0, len(cv_acc)):
        s_data.append(cv_acc[i])
        
    # evaluate overall model performance on training data
    overall_acc = accuracy_score(y_pred = y_pred, y_true = y_train)
    overall_cr = classification_report(y_pred = y_pred, y_true = y_train)
    print('Accuracy:',overall_acc)
    print(overall_cr)
    
    s_data.append(overall_acc)

    # Test model on unseen data from the test set
    sub_svc.fit(X_train, y_train)
    y_pred = sub_svc.predict(X_test) # classify age class using testing data
    acc = sub_svc.score(X_test, y_test) # get accuracy

    cr = classification_report(y_pred=y_pred, y_true=y_test) # get prec., recall & f1
    # print results
    print('accuracy =', acc)
    print(cr)  
    
    s_data.append(acc)
    
    # get map of coefficients    
    # coef_ = sub_svc.coef_
    # print(coef_.shape)
    #Return voxel weights into a nifti image using the NiftiMasker
    # coef_img = sub_masker.inverse_transform(coef_)
    #Save .nii to file
    # coef_img.to_filename(os.path.join(output_dir, 'Coef_maps', 'SVC_coeff_cs_ws_sub-'+str(sub)+'.nii'))
    
    cs_ws_data = cs_ws_data.append(pd.Series(s_data, index=cs_ws_data.columns), ignore_index=True)

demo_data = cw_data.copy()
demo_data.reset_index(level=None, drop=False, inplace=True)

cs_ws_data.insert(loc = 1, column = 'cognitive_status', value = demo_data['cognitive_status'], allow_duplicates=True)
cs_ws_data.insert(loc = 2, column = 'total_scrubbed_frames', value = demo_data['total_scrubbed_frames'], allow_duplicates=True)
cs_ws_data.insert(loc = 3, column = 'mean_FD', value = demo_data['mean_FD'], allow_duplicates=True)
cs_ws_data.insert(loc = 4, column = 'hits', value = demo_data['hits'], allow_duplicates=True)
cs_ws_data.insert(loc = 5, column = 'miss', value = demo_data['miss'], allow_duplicates=True)
cs_ws_data.insert(loc = 6, column = 'correct_source', value = demo_data['correct_source'], allow_duplicates=True)
cs_ws_data.insert(loc = 7, column = 'wrong_source', value = demo_data['wrong_source'], allow_duplicates=True)
cs_ws_data.insert(loc = 8, column = 'dprime', value = demo_data['dprime'], allow_duplicates=True)
cs_ws_data.insert(loc = 9, column = 'associative_memScore', value = demo_data['associative_memScore'], allow_duplicates=True)    

cs_ws_data.to_csv(os.path.join(output_dir, 'SVC_withinSub_cs_ws_wholeBrain.tsv'),
    sep='\t', header=True, index=False)


108391
training: 37 testing: 26
correctsource    25
wrongsource      12
Name: ctl_miss_ws_cs, dtype: int64 correctsource    18
wrongsource       8
Name: ctl_miss_ws_cs, dtype: int64
[0.66666667 0.33333333 0.66666667 0.66666667 0.6        1.
 0.5       ]
Accuracy: 0.6216216216216216
               precision    recall  f1-score   support

correctsource       0.68      0.84      0.75        25
  wrongsource       0.33      0.17      0.22        12

    micro avg       0.62      0.62      0.62        37
    macro avg       0.51      0.50      0.49        37
 weighted avg       0.57      0.62      0.58        37

accuracy = 0.6538461538461539
               precision    recall  f1-score   support

correctsource       0.76      0.72      0.74        18
  wrongsource       0.44      0.50      0.47         8

    micro avg       0.65      0.65      0.65        26
    macro avg       0.60      0.61      0.61        26
 weighted avg       0.67      0.65      0.66        26

122922
training: 33 t

accuracy = 0.6296296296296297
               precision    recall  f1-score   support

correctsource       0.75      0.75      0.75        20
  wrongsource       0.29      0.29      0.29         7

    micro avg       0.63      0.63      0.63        27
    macro avg       0.52      0.52      0.52        27
 weighted avg       0.63      0.63      0.63        27

258618
training: 30 testing: 21
correctsource    20
wrongsource      10
Name: ctl_miss_ws_cs, dtype: int64 correctsource    14
wrongsource       7
Name: ctl_miss_ws_cs, dtype: int64
[0.4        0.8        0.4        0.5        0.75       0.5
 0.66666667]
Accuracy: 0.5666666666666667
               precision    recall  f1-score   support

correctsource       0.67      0.70      0.68        20
  wrongsource       0.33      0.30      0.32        10

    micro avg       0.57      0.57      0.57        30
    macro avg       0.50      0.50      0.50        30
 weighted avg       0.56      0.57      0.56        30

accuracy = 0.6190476

[0.66666667 0.66666667 0.6        0.8        1.         0.25
 0.5       ]
Accuracy: 0.6470588235294118
               precision    recall  f1-score   support

correctsource       0.74      0.80      0.77        25
  wrongsource       0.29      0.22      0.25         9

    micro avg       0.65      0.65      0.65        34
    macro avg       0.51      0.51      0.51        34
 weighted avg       0.62      0.65      0.63        34

accuracy = 0.6666666666666666
               precision    recall  f1-score   support

correctsource       0.71      0.88      0.79        17
  wrongsource       0.33      0.14      0.20         7

    micro avg       0.67      0.67      0.67        24
    macro avg       0.52      0.51      0.49        24
 weighted avg       0.60      0.67      0.62        24

427357
training: 45 testing: 30
wrongsource      26
correctsource    19
Name: ctl_miss_ws_cs, dtype: int64 wrongsource      17
correctsource    13
Name: ctl_miss_ws_cs, dtype: int64
[0.42857143 0.57142

accuracy = 0.5555555555555556
               precision    recall  f1-score   support

correctsource       0.64      0.56      0.60        16
  wrongsource       0.46      0.55      0.50        11

    micro avg       0.56      0.56      0.56        27
    macro avg       0.55      0.55      0.55        27
 weighted avg       0.57      0.56      0.56        27

567214
training: 34 testing: 24
correctsource    17
wrongsource      17
Name: ctl_miss_ws_cs, dtype: int64 correctsource    12
wrongsource      12
Name: ctl_miss_ws_cs, dtype: int64
[0.5        0.83333333 0.5        0.75       0.75       0.5
 0.5       ]
Accuracy: 0.6176470588235294
               precision    recall  f1-score   support

correctsource       0.62      0.59      0.61        17
  wrongsource       0.61      0.65      0.63        17

    micro avg       0.62      0.62      0.62        34
    macro avg       0.62      0.62      0.62        34
 weighted avg       0.62      0.62      0.62        34

accuracy = 0.5
     

[0.5        0.5        0.33333333 0.66666667 0.5        0.5
 1.        ]
Accuracy: 0.55
               precision    recall  f1-score   support

correctsource       0.50      0.44      0.47         9
  wrongsource       0.58      0.64      0.61        11

    micro avg       0.55      0.55      0.55        20
    macro avg       0.54      0.54      0.54        20
 weighted avg       0.55      0.55      0.55        20

accuracy = 0.5714285714285714
               precision    recall  f1-score   support

correctsource       0.60      0.43      0.50         7
  wrongsource       0.56      0.71      0.63         7

    micro avg       0.57      0.57      0.57        14
    macro avg       0.58      0.57      0.56        14
 weighted avg       0.58      0.57      0.56        14

778749
training: 27 testing: 19
correctsource    14
wrongsource      13
Name: ctl_miss_ws_cs, dtype: int64 wrongsource      10
correctsource     9
Name: ctl_miss_ws_cs, dtype: int64
[0.25       0.25       0.5        

training: 40 testing: 27
correctsource    23
wrongsource      17
Name: ctl_miss_ws_cs, dtype: int64 correctsource    15
wrongsource      12
Name: ctl_miss_ws_cs, dtype: int64
[0.28571429 0.57142857 0.5        0.6        0.2        0.4
 0.4       ]
Accuracy: 0.425
               precision    recall  f1-score   support

correctsource       0.50      0.43      0.47        23
  wrongsource       0.35      0.41      0.38        17

    micro avg       0.42      0.42      0.42        40
    macro avg       0.42      0.42      0.42        40
 weighted avg       0.44      0.42      0.43        40

accuracy = 0.5185185185185185
               precision    recall  f1-score   support

correctsource       0.55      0.73      0.63        15
  wrongsource       0.43      0.25      0.32        12

    micro avg       0.52      0.52      0.52        27
    macro avg       0.49      0.49      0.47        27
 weighted avg       0.50      0.52      0.49        27

936730
training: 27 testing: 19
corrects

In [18]:
# CORRECT SOURCE VERSUS MISSED TRIAL CLASSIFICATION

# build data structure to store accuracy data and coefficients
cs_miss_data = pd.DataFrame()
cs_miss_data.insert(loc = 0, column = 'dccid', value = 'None', allow_duplicates=True)
# cs_miss_data.insert(loc = 1, column = 'diagnosis', value = 'None', allow_duplicates=True)
for i in range(0, 7):
    cs_miss_data.insert(loc = cs_miss_data.shape[1], column = 'CV'+str(i+1)+'_acc', value = NaN, allow_duplicates=True)
cs_miss_data.insert(loc = cs_miss_data.shape[1], column = 'TrainSet_MeanCV_acc', value = 'None', allow_duplicates=True)
cs_miss_data.insert(loc = cs_miss_data.shape[1], column = 'TestSet_acc', value = 'None', allow_duplicates=True)

for sub in cmiss_subs:
    print(sub)
    s_data = [sub]
    # load subject's beta maps (one per trial)
    betas = image.load_img(img=os.path.join(beta_dir, str(sub), 'TrialContrasts/betas_sub'+str(sub)+'*.nii'),
                           wildcards=True)
    # initialize NiftiLabelMasker object    
    sub_mask = nb.load(os.path.join(mask_dir, 'func_sub'+str(sub)+'_mask_stereonl.nii'))
    sub_masker = NiftiMasker(mask_img=sub_mask, standardize=True)
                   
    # transform subject's beta maps into vector of network means per trial
    X_cs_ws_miss_ctl = sub_masker.fit_transform(betas) 
    
    # load subject's trial labels
    labels_file = os.path.join(label_dir, 'sub-'+str(sub)+'_ctl_miss_ws_cs.tsv')
    y_cs_ws_miss_ctl = pd.read_csv(labels_file, sep='\t')
    y_cs_ws_miss_ctl_labels = y_cs_ws_miss_ctl['ctl_miss_ws_cs']
    # mask X and y data to exclude trials of no interest
    cs_miss_mask = y_cs_ws_miss_ctl_labels.isin(['correctsource', 'missed'])
    y_cs_miss = y_cs_ws_miss_ctl_labels[cs_miss_mask]      
    X_cs_miss  = X_cs_ws_miss_ctl[cs_miss_mask]
    
    # Split trials into a training and a test set
    X_train, X_test, y_train, y_test = train_test_split(
        X_cs_miss, # x
        y_cs_miss, # y
        test_size = 0.4, # 60%/40% split
        shuffle = True, # shuffle dataset before splitting
        stratify = y_cs_miss, # keep distribution of conditions consistent betw. train & test sets
        #random_state = 123  # if set number, same shuffle each time, otherwise randomization algo
        ) 
    print('training:', len(X_train), 'testing:', len(X_test))
    print(y_train.value_counts(), y_test.value_counts())
    
    # define the model
    sub_svc = SVC(kernel='linear', class_weight='balanced')
    
    # do cross-validation to evaluate model performance
    # within 10 folds of training set
    # predict
    y_pred = cross_val_predict(sub_svc, X_train, y_train,
                               groups=y_train, cv=7)
    # scores
    cv_acc = cross_val_score(sub_svc, X_train, y_train,
                         groups=y_train, cv=7)
    print(cv_acc)
    
    for i in range(0, len(cv_acc)):
        s_data.append(cv_acc[i])
        
    # evaluate overall model performance on training data
    overall_acc = accuracy_score(y_pred = y_pred, y_true = y_train)
    overall_cr = classification_report(y_pred = y_pred, y_true = y_train)
    print('Accuracy:',overall_acc)
    print(overall_cr)
    
    s_data.append(overall_acc)

    # Test model on unseen data from the test set
    sub_svc.fit(X_train, y_train)
    y_pred = sub_svc.predict(X_test) # classify age class using testing data
    acc = sub_svc.score(X_test, y_test) # get accuracy

    cr = classification_report(y_pred=y_pred, y_true=y_test) # get prec., recall & f1
    # print results
    print('accuracy =', acc)
    print(cr)  
    
    s_data.append(acc)
    
    # get map of coefficients    
    # coef_ = sub_svc.coef_
    # print(coef_.shape)
    #Return voxel weights into a nifti image using the NiftiMasker
    # coef_img = sub_masker.inverse_transform(coef_)
    #Save .nii to file
    # coef_img.to_filename(os.path.join(output_dir, 'Coef_maps', 'SVC_coeff_cs_ws_sub-'+str(sub)+'.nii'))
        
    cs_miss_data = cs_miss_data.append(pd.Series(s_data, index=cs_miss_data.columns), ignore_index=True)

demo_data = cmiss_data.copy()
demo_data.reset_index(level=None, drop=False, inplace=True)

cs_miss_data.insert(loc = 1, column = 'cognitive_status', value = demo_data['cognitive_status'], allow_duplicates=True)
cs_miss_data.insert(loc = 2, column = 'total_scrubbed_frames', value = demo_data['total_scrubbed_frames'], allow_duplicates=True)
cs_miss_data.insert(loc = 3, column = 'mean_FD', value = demo_data['mean_FD'], allow_duplicates=True)
cs_miss_data.insert(loc = 4, column = 'hits', value = demo_data['hits'], allow_duplicates=True)
cs_miss_data.insert(loc = 5, column = 'miss', value = demo_data['miss'], allow_duplicates=True)
cs_miss_data.insert(loc = 6, column = 'correct_source', value = demo_data['correct_source'], allow_duplicates=True)
cs_miss_data.insert(loc = 7, column = 'wrong_source', value = demo_data['wrong_source'], allow_duplicates=True)
cs_miss_data.insert(loc = 8, column = 'dprime', value = demo_data['dprime'], allow_duplicates=True)
cs_miss_data.insert(loc = 9, column = 'associative_memScore', value = demo_data['associative_memScore'], allow_duplicates=True)    

cs_miss_data.to_csv(os.path.join(output_dir, 'SVC_withinSub_cs_miss_wholeBrain.tsv'),
                    sep='\t', header=True, index=False)


108391
training: 34 testing: 24
correctsource    25
missed            9
Name: ctl_miss_ws_cs, dtype: int64 correctsource    18
missed            6
Name: ctl_miss_ws_cs, dtype: int64
[0.66666667 0.66666667 0.6        0.4        0.75       1.
 0.5       ]
Accuracy: 0.6470588235294118
               precision    recall  f1-score   support

correctsource       0.72      0.84      0.78        25
       missed       0.20      0.11      0.14         9

    micro avg       0.65      0.65      0.65        34
    macro avg       0.46      0.48      0.46        34
 weighted avg       0.59      0.65      0.61        34

accuracy = 0.625
               precision    recall  f1-score   support

correctsource       0.76      0.72      0.74        18
       missed       0.29      0.33      0.31         6

    micro avg       0.62      0.62      0.62        24
    macro avg       0.53      0.53      0.53        24
 weighted avg       0.64      0.62      0.63        24

122922
training: 28 testing: 20
co

accuracy = 0.64
               precision    recall  f1-score   support

correctsource       0.71      0.75      0.73        16
       missed       0.50      0.44      0.47         9

    micro avg       0.64      0.64      0.64        25
    macro avg       0.60      0.60      0.60        25
 weighted avg       0.63      0.64      0.63        25

314409
training: 40 testing: 28
correctsource    21
missed           19
Name: ctl_miss_ws_cs, dtype: int64 correctsource    14
missed           14
Name: ctl_miss_ws_cs, dtype: int64
[0.5        0.16666667 0.83333333 0.83333333 0.5        0.6
 0.4       ]
Accuracy: 0.55
               precision    recall  f1-score   support

correctsource       0.58      0.52      0.55        21
       missed       0.52      0.58      0.55        19

    micro avg       0.55      0.55      0.55        40
    macro avg       0.55      0.55      0.55        40
 weighted avg       0.55      0.55      0.55        40

accuracy = 0.6428571428571429
               pre

accuracy = 0.8
               precision    recall  f1-score   support

correctsource       0.81      0.96      0.88        23
       missed       0.67      0.29      0.40         7

    micro avg       0.80      0.80      0.80        30
    macro avg       0.74      0.62      0.64        30
 weighted avg       0.78      0.80      0.77        30

458807
training: 36 testing: 24
correctsource    21
missed           15
Name: ctl_miss_ws_cs, dtype: int64 correctsource    14
missed           10
Name: ctl_miss_ws_cs, dtype: int64
[0.5 0.6 0.8 0.4 0.6 0.6 0.6]
Accuracy: 0.5833333333333334
               precision    recall  f1-score   support

correctsource       0.62      0.76      0.68        21
       missed       0.50      0.33      0.40        15

    micro avg       0.58      0.58      0.58        36
    macro avg       0.56      0.55      0.54        36
 weighted avg       0.57      0.58      0.56        36

accuracy = 0.5833333333333334
               precision    recall  f1-score   s

accuracy = 0.44
               precision    recall  f1-score   support

correctsource       0.20      0.09      0.13        11
       missed       0.50      0.71      0.59        14

    micro avg       0.44      0.44      0.44        25
    macro avg       0.35      0.40      0.36        25
 weighted avg       0.37      0.44      0.38        25

729722
training: 40 testing: 28
correctsource    26
missed           14
Name: ctl_miss_ws_cs, dtype: int64 correctsource    18
missed           10
Name: ctl_miss_ws_cs, dtype: int64
[0.5        0.5        0.66666667 0.33333333 0.66666667 0.6
 0.2       ]
Accuracy: 0.5
               precision    recall  f1-score   support

correctsource       0.59      0.73      0.66        26
       missed       0.12      0.07      0.09        14

    micro avg       0.50      0.50      0.50        40
    macro avg       0.36      0.40      0.37        40
 weighted avg       0.43      0.50      0.46        40

accuracy = 0.6071428571428571
               prec

[0.66666667 0.5        0.66666667 0.5        0.4        0.6
 0.75      ]
Accuracy: 0.5789473684210527
               precision    recall  f1-score   support

correctsource       0.57      0.44      0.50        18
       missed       0.58      0.70      0.64        20

    micro avg       0.58      0.58      0.58        38
    macro avg       0.58      0.57      0.57        38
 weighted avg       0.58      0.58      0.57        38

accuracy = 0.6153846153846154
               precision    recall  f1-score   support

correctsource       0.58      0.85      0.69        13
       missed       0.71      0.38      0.50        13

    micro avg       0.62      0.62      0.62        26
    macro avg       0.65      0.62      0.59        26
 weighted avg       0.65      0.62      0.59        26

983291
training: 40 testing: 28
correctsource    29
missed           11
Name: ctl_miss_ws_cs, dtype: int64 correctsource    21
missed            7
Name: ctl_miss_ws_cs, dtype: int64
[0.71428571 0.333333