In [43]:
%matplotlib inline

Between-subject SVR (Support Vector Regression) prediction of latent "memory" score computed with PCA on battery of neuropysch test scores, based on fMRI contrast between task conditions (CIMAQ memory encoding task) whose beta weights are averaged within networks from different grains of MIST parcellation. 

Mean network betas reflect the engagement of a particular network for each trial. 

MIST Parcellations include: 7, 12, 20, 36, 64, 122, 197, 325, 444 networks

Predictions are based on a single brain map of beta weights that
reflect the contrast between task conditions (each voxel's beta is a feature):

- encoding task versus control task
- hit versus miss trials
- correct versus incorrect source trials
- correct source versus miss trials

Only contrast maps from participants who had at least 10 trials in each contrasted conditions
are included in each analysis (more stable and representative contrasts) 

In [44]:
import os
import sys
import glob
import numpy as np
import pandas as pd
import nilearn
import scipy
import nibabel as nb
import sklearn
import seaborn as sns
import itertools

from numpy import nan as NaN
from matplotlib import pyplot as plt
from nilearn import image, plotting
from nilearn import masking
from nilearn import plotting
from nilearn import datasets
from nilearn.plotting import plot_stat_map, plot_roi, plot_anat, plot_img, show
from nilearn.input_data import NiftiMasker, NiftiLabelsMasker
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC, LinearSVC, SVR, LinearSVR
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, precision_score, f1_score
from sklearn.model_selection import cross_val_predict, cross_val_score
from sklearn.preprocessing import MinMaxScaler


Step 1: import list of participants, and generate sublists of participants who have enough trials per category to have proper contrasts between conditions 

1. Encoding vs Control tasks contrast (all 94 participants)

3. Hit versus Miss contrast (61 participants; at least 10 trials per condition)

4. Correct Source versus Wrong Source (83 participants; at least 10 trials per condition)

5. Correct Source versus Miss (59 participants; at least 10 trials per condition)


In [45]:
# Path to directory with participant list
data_file = '/Users/mombot/Documents/Simexp/CIMAQ/Data/Participants/Splitting/Sub_list.tsv'
sub_data = pd.read_csv(data_file, sep = '\t')

# Exclude participants who failed QC
sub_data = sub_data[sub_data['QC_status']!= 'F']

# Set minimal number of trials needed per subject to include them in analysis
num = 9

# Encoding vs Control, and Stimulus Category classifications
all_subs = sub_data['participant_id']
all_diagnosis = sub_data['cognitive_status']
all_memScore = sub_data['Fac1_memory']
print(all_subs)
print(len(all_subs))

# Hit versus Miss
hm_data = sub_data[sub_data['hits'] > num]
hm_data = hm_data[hm_data['miss'] > num]
hm_subs = hm_data['participant_id']
hm_diagnosis = hm_data['cognitive_status']
hm_memScore = hm_data['Fac1_memory']
print(hm_subs)
print(len(hm_subs))

# Correct Source versus Wrong Source 
cw_data = sub_data[sub_data['correct_source'] > num]
cw_data = cw_data[cw_data['wrong_source'] > num]
cw_subs = cw_data['participant_id']
cw_diagnosis = cw_data['cognitive_status']
cw_memScore = cw_data['Fac1_memory']
print(cw_subs)
print(len(cw_subs))

# Correct Source versus Miss
cmiss_data = sub_data[sub_data['correct_source'] > num]
cmiss_data = cmiss_data[cmiss_data['miss'] > num]
cmiss_subs = cmiss_data['participant_id']
cmiss_diagnosis = cmiss_data['cognitive_status']
cmiss_memScore = cmiss_data['Fac1_memory']
print(cmiss_subs)
print(len(cmiss_subs))


0      108391
1      120839
2      122922
3      127228
4      139593
6      147863
7      150649
8      164965
9      175295
10     178101
11     189005
12     197192
14     199801
15     219637
16     229301
17     247659
18     254402
19     255499
20     258618
21     258912
22     267168
23     270218
24     271596
27     314409
28     326073
29     336665
30     337021
31     350555
32     370092
34     385370
        ...  
70     763590
71     778749
72     783781
73     785217
74     785245
75     804743
77     845675
78     866812
79     878354
80     884343
81     886007
83     893978
85     901551
86     906145
87     914042
88     915022
89     920577
90     932933
91     936730
92     938001
93     955548
94     956049
95     956130
96     968913
97     974246
98     979001
99     983291
100    988602
101    996599
102    998166
Name: participant_id, Length: 94, dtype: int64
94
0      108391
2      122922
4      139593
6      147863
7      150649
8      164965
14     19980

Step 2: For each subject list (analysis), create a group mask from individual functional mri masks.

The mask should only include voxels included in all participants's individual functional mask (intersection). The mask will serve to vectorize 3D beta weigths maps into feature rows.
**Update: use 0.5 treshold, otherwise too much signal drop out**


In [46]:
# Anatomical template for display
anat = '/Users/mombot/Documents/Simexp/CIMAQ/Data/Templates/template_anat_stereo.nii'

# Path to directory with masks
mask_dir = '/Users/mombot/Documents/Simexp/CIMAQ/Data/masks'

# All participants (94 participants)
all_mask_list = []
for sub in all_subs:
    mask = os.path.join(mask_dir, 'func_sub'+str(sub)+'_mask_stereonl.nii')
    all_mask_list.append(mask)
print(len(all_mask_list))    
grp_mask_all = masking.intersect_masks(mask_imgs = all_mask_list, threshold=0.5, connected=True)    

# plotting.plot_roi(roi_img=grp_mask_all, bg_img=anat, cut_coords=(0, -7, -7), cmap='Paired')
#plotting.view_img(grp_mask_all, bg_img=anat, resampling_interpolation='nearest')

# Hit versus miss (49 participants)
hm_mask_list = []
for sub in hm_subs:
    mask = os.path.join(mask_dir, 'func_sub'+str(sub)+'_mask_stereonl.nii')
    hm_mask_list.append(mask)
print(len(hm_mask_list))    
grp_mask_hm = masking.intersect_masks(mask_imgs = hm_mask_list, threshold=0.50, connected=True)    

# plotting.plot_roi(roi_img=grp_mask_hm, bg_img=anat, cut_coords=(0, -7, -7), cmap='Paired')
#plotting.view_img(grp_mask_hm, bg_img=anat, resampling_interpolation='nearest')

# Correct Source versus Wrong Source (49 participants)
cw_mask_list = []
for sub in cw_subs:
    mask = os.path.join(mask_dir, 'func_sub'+str(sub)+'_mask_stereonl.nii')
    cw_mask_list.append(mask)
print(len(cw_mask_list))    
grp_mask_cw = masking.intersect_masks(mask_imgs = cw_mask_list, threshold=0.50, connected=True)    

# plotting.plot_roi(roi_img=grp_mask_cw, bg_img=anat, cut_coords=(0, -7, -7), cmap='Paired')
#plotting.view_img(grp_mask_cw, bg_img=anat, resampling_interpolation='nearest')


# Correct Source versus Miss (38 participants)
cmiss_mask_list = []
for sub in cmiss_subs:
    mask = os.path.join(mask_dir, 'func_sub'+str(sub)+'_mask_stereonl.nii')
    cmiss_mask_list.append(mask)
print(len(cmiss_mask_list))    
grp_mask_cmiss = masking.intersect_masks(mask_imgs = cmiss_mask_list, threshold=0.50, connected=True)    

# plotting.plot_roi(roi_img=grp_mask_cw, bg_img=anat, cut_coords=(0, -7, -7), cmap='Paired')
plotting.view_img(grp_mask_cmiss, bg_img=anat, resampling_interpolation='nearest')


94
61
83
59


Step 3: For each categorization, randomly assign and split participants into a training set and a test set. Within the same step, also assign the corresponding neuropsych score to that same set. 

Note: stratify to maintain comparable proportions of Cognitively Normal (Controls), Subjective Cognitive Disorder (SCD) and Mild Cognitive Impairment (MCI) participants between the testing and training sets.


In [47]:

# Encoding vs Control Task Conditions
enc_ctl_train, enc_ctl_test, y_enc_ctl_train, y_enc_ctl_test = train_test_split(
    all_subs, # list of subjects to split
    all_memScore, # list of scores to split
    test_size = 0.4, # 60%/40% split between train and test
    shuffle= True, 
    stratify = all_diagnosis, # keep consistent proportions of Controls, SCDs and MCIs between sets
    random_state = 123)

print('enc_ctl training subjects:', len(enc_ctl_train),
      'enc_ctl training scores:', len(y_enc_ctl_train),
      'enc_ctl testing subjects:', len(enc_ctl_test),
     'enc_ctl testing scores:', len(y_enc_ctl_test))


# Hit vs Miss Trials
hit_miss_train, hit_miss_test, y_hit_miss_train, y_hit_miss_test = train_test_split(
    hm_subs, # list of subjects to split
    hm_memScore, # list of scores to split
    test_size = 0.4, # 60%/40% split between train and test
    shuffle= True, 
    stratify = hm_diagnosis, # keep consistent proportions of Controls, SCDs and MCIs between sets
    random_state = 52)

print('hit_miss training subjects:', len(hit_miss_train),
      'hit_miss training scores:', len(y_hit_miss_train),
      'hit_miss testing subjects:', len(hit_miss_test),
     'hit_miss testing scores:', len(y_hit_miss_test))


# Correct Source vs Wrong Source Trials
cs_ws_train, cs_ws_test, y_cs_ws_train, y_cs_ws_test = train_test_split(
    cw_subs, # list of subjects to split
    cw_memScore, # list of scores to split
    test_size = 0.4, # 60%/40% split between train and test
    shuffle= True, 
    stratify = cw_diagnosis, # keep consistent proportions of Controls, SCDs and MCIs between sets
    random_state = 46)

print('cs_ws training subjects:', len(cs_ws_train),
      'cs_ws training scores:', len(y_cs_ws_train),
      'cs_ws testing subjects:', len(cs_ws_test),
     'cs_ws testing scores:', len(y_cs_ws_test))


# Correct Source vs Miss Trials

cs_miss_train, cs_miss_test, y_cs_miss_train, y_cs_miss_test = train_test_split(
    cmiss_subs, # list of subjects to split
    cmiss_memScore, # list of scores to split
    test_size = 0.4, # 60%/40% split between train and test
    shuffle= True, 
    stratify = cmiss_diagnosis, # keep consistent proportions of Controls, SCDs and MCIs between sets
    random_state = 103)

print('cs_miss training subjects:', len(cs_miss_train),
      'cs_miss training scores:', len(y_cs_miss_train),
      'cs_miss testing subjects:', len(cs_miss_test),
     'cs_miss testing scores:', len(y_cs_miss_test))


enc_ctl training subjects: 56 enc_ctl training scores: 56 enc_ctl testing subjects: 38 enc_ctl testing scores: 38
hit_miss training subjects: 36 hit_miss training scores: 36 hit_miss testing subjects: 25 hit_miss testing scores: 25
cs_ws training subjects: 49 cs_ws training scores: 49 cs_ws testing subjects: 34 cs_ws testing scores: 34
cs_miss training subjects: 35 cs_miss training scores: 35 cs_miss testing subjects: 24 cs_miss testing scores: 24


Step 4: Build training and testing feature matrices

Note: 
Trying different grains of segmentation: 7, 20, 64, 325, 444 networks

ALSO: 
Import MIST network labels to identify and interpret features
For each participant:
- With nilearn's NiftiLabelMasker, extract average beta weight from single 3D beta of task contrast (computer in Nistats, 1st level model) within each labelled network, and output a matrix of network weights
- concatenate the participant's network features into a single matrix per set (train and test) per analysis.
Here, a single row of features per participant

Note: 
Masking: using a group mask built from the intersection of normalized functional MRI data masks (outputted by NIAK), to determine which voxels to include in the final data matrix

ALSO: 
Import MIST network labels to identify and interpret features

In [67]:
# set paths to directories of interest
beta_dir = '/Users/mombot/Documents/Simexp/CIMAQ/Data/Nistats/Betas'

# set the parcellation level and load the parcellation map
numnet = 325

basc_dir = '/Users/mombot/Documents/Simexp/CIMAQ/Data/MIST/Release/Parcellations'
basc = image.load_img(os.path.join(basc_dir, 'MIST_'+str(numnet)+'.nii'))

b_labels = '/Users/mombot/Documents/Simexp/CIMAQ/Data/MIST/Release/Parcel_Information/MIST_'+str(numnet)+'.csv'
basc_labels = pd.read_csv(b_labels, sep=';')


In [68]:
# ENCODING VERSUS CONTROL TASK CONTRAST

# initialize the NiftiLabelsMasker object
# one set of averaged betas per network becomes its own row in X_data matrix
label_masker_all = NiftiLabelsMasker(labels_img=basc, standardize=True, mask_img=grp_mask_all, 
                                     memory = 'nilearn_cache', verbose=0)

# TRAINING SET   

j = 0    
subtrain_fileList = []
for sub in enc_ctl_train: 
    j = j + 1
    print('subject '+str(j)+' : '+str(sub))
    # set path to the contrast's beta map
    beta = os.path.join(beta_dir, str(sub), 'TaskContrasts/betas_sub'+str(sub)+'_enc_minus_ctl.nii')
    # append path to set's ordered list of beta contrasts
    subtrain_fileList.append(beta)

# concatenate subjects' beta maps into a single 4D file    
subtrain_betas = nb.funcs.concat_images(images=subtrain_fileList, check_affines=True, axis=None)

# vectorize beta maps into a 2D numpy array
X_enc_ctl_train = label_masker_all.fit_transform(subtrain_betas)

print('number of X filled rows: ', X_enc_ctl_train.shape[0],
     '\nnumber of X filled columns: ', X_enc_ctl_train.shape[1])

print('The training data set is built!') 



# TESTING SET

j = 0    
subtest_fileList = []
for sub in enc_ctl_test: 
    j = j + 1
    print('subject '+str(j)+' : '+str(sub))
    # set path to the contrast's beta map
    beta = os.path.join(beta_dir, str(sub), 'TaskContrasts/betas_sub'+str(sub)+'_enc_minus_ctl.nii')
    # append path to set's ordered list of beta contrasts
    subtest_fileList.append(beta)

# concatenate subjects' beta maps into a single 4D file    
subtest_betas = nb.funcs.concat_images(images=subtest_fileList, check_affines=True, axis=None)

# vectorize beta maps into a 2D numpy array
X_enc_ctl_test = label_masker_all.fit_transform(subtest_betas)

print('number of X filled rows: ', X_enc_ctl_test.shape[0],
     '\nnumber of X filled columns: ', X_enc_ctl_test.shape[1])

print('The test data set is built!')


subject 1 : 878354
subject 2 : 955548
subject 3 : 983291
subject 4 : 968913
subject 5 : 956049
subject 6 : 893978
subject 7 : 258912
subject 8 : 628299
subject 9 : 711830
subject 10 : 267168
subject 11 : 543589
subject 12 : 484204
subject 13 : 668786
subject 14 : 748676
subject 15 : 549994
subject 16 : 845675
subject 17 : 974246
subject 18 : 189005
subject 19 : 520377
subject 20 : 408506
subject 21 : 439776
subject 22 : 490035
subject 23 : 739694
subject 24 : 326073
subject 25 : 785217
subject 26 : 920577
subject 27 : 270218
subject 28 : 785245
subject 29 : 437101
subject 30 : 254402
subject 31 : 219637
subject 32 : 988602
subject 33 : 597569
subject 34 : 258618
subject 35 : 413474
subject 36 : 139593
subject 37 : 936730
subject 38 : 459801
subject 39 : 783781
subject 40 : 998166
subject 41 : 914042
subject 42 : 386333
subject 43 : 127228
subject 44 : 763590
subject 45 : 884343
subject 46 : 652850
subject 47 : 956130
subject 48 : 502616
subject 49 : 147863
subject 50 : 175295
subject 5

In [69]:

# HIT VERSUS MISS TRIALS CONTRAST

# initialize the NiftiLabelsMasker object
# one set of averaged betas per network becomes its own row in X_data matrix
label_masker_hm = NiftiLabelsMasker(labels_img=basc, standardize=True, mask_img=grp_mask_hm, 
                                     memory = 'nilearn_cache', verbose=0)

# TRAINING SET   

j = 0    
subtrain_fileList = []
for sub in hit_miss_train: 
    j = j + 1
    print('subject '+str(j)+' : '+str(sub))
    # set path to the contrast's beta map
    beta = os.path.join(beta_dir, str(sub), 'TaskContrasts/betas_sub'+str(sub)+'_hit_minus_ctl.nii')
    # append path to set's ordered list of beta contrasts
    subtrain_fileList.append(beta)

# concatenate subjects' beta maps into a single 4D file    
subtrain_betas = nb.funcs.concat_images(images=subtrain_fileList, check_affines=True, axis=None)

# vectorize beta maps into a 2D numpy array
X_hit_miss_train = label_masker_hm.fit_transform(subtrain_betas)

print('number of X filled rows: ', X_hit_miss_train.shape[0],
     '\nnumber of X filled columns: ', X_hit_miss_train.shape[1])

print('The training data set is built!') 


# TESTING SET

j = 0    
subtest_fileList = []
for sub in hit_miss_test: 
    j = j + 1
    print('subject '+str(j)+' : '+str(sub))
    # set path to the contrast's beta map
    beta = os.path.join(beta_dir, str(sub), 'TaskContrasts/betas_sub'+str(sub)+'_hit_minus_ctl.nii')
    # append path to set's ordered list of beta contrasts
    subtest_fileList.append(beta)

# concatenate subjects' beta maps into a single 4D file    
subtest_betas = nb.funcs.concat_images(images=subtest_fileList, check_affines=True, axis=None)

# vectorize beta maps into a 2D numpy array
X_hit_miss_test = label_masker_hm.fit_transform(subtest_betas)

print('number of X filled rows: ', X_hit_miss_test.shape[0],
     '\nnumber of X filled columns: ', X_hit_miss_test.shape[1])

print('The test data set is built!')


subject 1 : 408506
subject 2 : 979001
subject 3 : 164965
subject 4 : 974246
subject 5 : 219637
subject 6 : 932933
subject 7 : 920577
subject 8 : 459801
subject 9 : 441008
subject 10 : 652850
subject 11 : 490035
subject 12 : 258912
subject 13 : 370092
subject 14 : 337021
subject 15 : 139593
subject 16 : 785245
subject 17 : 396250
subject 18 : 711830
subject 19 : 439776
subject 20 : 255499
subject 21 : 915022
subject 22 : 458807
subject 23 : 199801
subject 24 : 996599
subject 25 : 936730
subject 26 : 336665
subject 27 : 413474
subject 28 : 729722
subject 29 : 956130
subject 30 : 267168
subject 31 : 437101
subject 32 : 484204
subject 33 : 147863
subject 34 : 884343
subject 35 : 247659
subject 36 : 763590
number of X filled rows:  36 
number of X filled columns:  325
The training data set is built!
subject 1 : 748676
subject 2 : 258618
subject 3 : 271596
subject 4 : 597569
subject 5 : 403131
subject 6 : 122922
subject 7 : 677561
subject 8 : 314409
subject 9 : 270218
subject 10 : 739694
sub

In [70]:
# CORRECT SOURCE VERSUS WRONG SOURCE TRIALS CONTRAST

# initialize the NiftiLabelsMasker object
# one set of averaged betas per network becomes its own row in X_data matrix
label_masker_cw = NiftiLabelsMasker(labels_img=basc, standardize=True, mask_img=grp_mask_cw, 
                                     memory = 'nilearn_cache', verbose=0)

# TRAINING SET   

j = 0    
subtrain_fileList = []
for sub in cs_ws_train: 
    j = j + 1
    print('subject '+str(j)+' : '+str(sub))
    # set path to the contrast's beta map
    beta = os.path.join(beta_dir, str(sub), 'TaskContrasts/betas_sub'+str(sub)+'_cs_minus_ws.nii')
    # append path to set's ordered list of beta contrasts
    subtrain_fileList.append(beta)

# concatenate subjects' beta maps into a single 4D file    
subtrain_betas = nb.funcs.concat_images(images=subtrain_fileList, check_affines=True, axis=None)

# vectorize beta maps into a 2D numpy array
X_cs_ws_train = label_masker_cw.fit_transform(subtrain_betas)

print('number of X filled rows: ', X_cs_ws_train.shape[0],
     '\nnumber of X filled columns: ', X_cs_ws_train.shape[1])

print('The training data set is built!') 


# TESTING SET

j = 0    
subtest_fileList = []
for sub in cs_ws_test: 
    j = j + 1
    print('subject '+str(j)+' : '+str(sub))
    # set path to the contrast's beta map
    beta = os.path.join(beta_dir, str(sub), 'TaskContrasts/betas_sub'+str(sub)+'_cs_minus_ws.nii')
    # append path to set's ordered list of beta contrasts
    subtest_fileList.append(beta)

# concatenate subjects' beta maps into a single 4D file    
subtest_betas = nb.funcs.concat_images(images=subtest_fileList, check_affines=True, axis=None)

# vectorize beta maps into a 2D numpy array
X_cs_ws_test = label_masker_cw.fit_transform(subtest_betas)

print('number of X filled rows: ', X_cs_ws_test.shape[0],
     '\nnumber of X filled columns: ', X_cs_ws_test.shape[1])

print('The test data set is built!')



subject 1 : 652850
subject 2 : 127228
subject 3 : 893978
subject 4 : 462345
subject 5 : 258618
subject 6 : 549994
subject 7 : 785245
subject 8 : 441008
subject 9 : 370092
subject 10 : 668786
subject 11 : 413474
subject 12 : 229301
subject 13 : 219637
subject 14 : 247659
subject 15 : 938001
subject 16 : 543589
subject 17 : 350555
subject 18 : 427357
subject 19 : 337021
subject 20 : 197192
subject 21 : 150649
subject 22 : 998166
subject 23 : 175295
subject 24 : 968913
subject 25 : 326073
subject 26 : 932933
subject 27 : 502616
subject 28 : 408506
subject 29 : 778749
subject 30 : 763590
subject 31 : 458807
subject 32 : 739694
subject 33 : 914042
subject 34 : 711830
subject 35 : 729722
subject 36 : 139593
subject 37 : 314409
subject 38 : 484204
subject 39 : 983291
subject 40 : 886007
subject 41 : 122922
subject 42 : 490035
subject 43 : 267168
subject 44 : 677561
subject 45 : 785217
subject 46 : 878354
subject 47 : 597569
subject 48 : 996599
subject 49 : 120839
number of X filled rows:  49 

In [71]:
# CORRECT SOURCE VERSUS MISS TRIALS CONTRAST

# initialize the NiftiLabelsMasker object
# one set of averaged betas per network becomes its own row in X_data matrix
label_masker_cmiss = NiftiLabelsMasker(labels_img=basc, standardize=True, mask_img=grp_mask_cmiss, 
                                     memory = 'nilearn_cache', verbose=0)


# TRAINING SET   

j = 0    
subtrain_fileList = []
for sub in cs_miss_train: 
    j = j + 1
    print('subject '+str(j)+' : '+str(sub))
    # set path to the contrast's beta map
    beta = os.path.join(beta_dir, str(sub), 'TaskContrasts/betas_sub'+str(sub)+'_cs_minus_miss.nii')
    # append path to set's ordered list of beta contrasts
    subtrain_fileList.append(beta)

# concatenate subjects' beta maps into a single 4D file    
subtrain_betas = nb.funcs.concat_images(images=subtrain_fileList, check_affines=True, axis=None)

# vectorize beta maps into a 2D numpy array
X_cs_miss_train = label_masker_cmiss.fit_transform(subtrain_betas)

print('number of X filled rows: ', X_cs_miss_train.shape[0],
     '\nnumber of X filled columns: ', X_cs_miss_train.shape[1])

print('The training data set is built!') 


# TESTING SET

j = 0    
subtest_fileList = []
for sub in cs_miss_test: 
    j = j + 1
    print('subject '+str(j)+' : '+str(sub))
    # set path to the contrast's beta map
    beta = os.path.join(beta_dir, str(sub), 'TaskContrasts/betas_sub'+str(sub)+'_cs_minus_miss.nii')
    # append path to set's ordered list of beta contrasts
    subtest_fileList.append(beta)

# concatenate subjects' beta maps into a single 4D file    
subtest_betas = nb.funcs.concat_images(images=subtest_fileList, check_affines=True, axis=None)

# vectorize beta maps into a 2D numpy array
X_cs_miss_test = label_masker_cmiss.fit_transform(subtest_betas)

print('number of X filled rows: ', X_cs_miss_test.shape[0],
     '\nnumber of X filled columns: ', X_cs_miss_test.shape[1])

print('The test data set is built!')


subject 1 : 150649
subject 2 : 108391
subject 3 : 748676
subject 4 : 652850
subject 5 : 271596
subject 6 : 484204
subject 7 : 956130
subject 8 : 314409
subject 9 : 255499
subject 10 : 893978
subject 11 : 936730
subject 12 : 555537
subject 13 : 164965
subject 14 : 199801
subject 15 : 597569
subject 16 : 932933
subject 17 : 219637
subject 18 : 403131
subject 19 : 459801
subject 20 : 247659
subject 21 : 979001
subject 22 : 677561
subject 23 : 983291
subject 24 : 258618
subject 25 : 370092
subject 26 : 996599
subject 27 : 956049
subject 28 : 441008
subject 29 : 270218
subject 30 : 785245
subject 31 : 739694
subject 32 : 906145
subject 33 : 267168
subject 34 : 396250
subject 35 : 884343
number of X filled rows:  35 
number of X filled columns:  325
The training data set is built!
subject 1 : 439776
subject 2 : 122922
subject 3 : 783781
subject 4 : 763590
subject 5 : 974246
subject 6 : 139593
subject 7 : 408506
subject 8 : 886007
subject 9 : 458807
subject 10 : 385370
subject 11 : 711830
sub

In [75]:
# Specify data to input the Support Vector Machine model for different classifications: 
 
# ENCODING vs CONTROL condition: 
# Training: X_enc_ctl_train, y_enc_ctl_train
# Testing: X_enc_ctl_test, y_enc_ctl_test

# HIT vs MISS trials: 
# Training: X_hit_miss_train, y_hit_miss_train
# Testing: X_hit_miss_test, y_hit_miss_test

# CORRECT SOURCE vs WRONG SOURCE trials:
# Training: X_cs_ws_train, y_cs_ws_train
# Testing: X_cs_ws_test, y_cs_ws_test

# CORRECT SOURCE vs MISS trials: 
# Training: X_cs_miss_train, y_cs_miss_train
# Testing: X_cs_miss_test, y_cs_miss_test

X_train = X_cs_ws_train
y_train = y_cs_ws_train
X_test = X_cs_ws_test
y_test = y_cs_ws_test


In [76]:
# initialise the SVR model
# https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVR.html

# Note that class_weight gives equivalent influence to different categories
# important if number of trials differs per condition
#trial_svc = SVC(kernel='linear', class_weight='balanced') #define the model
memo_svr = SVR(kernel = 'linear') #define the model

print(memo_svr)

SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1,
  gamma='auto_deprecated', kernel='linear', max_iter=-1, shrinking=True,
  tol=0.001, verbose=False)


In [77]:
# fit the model
# memo_svr.fit(X_train, y_train)

# predict
y_pred = cross_val_predict(memo_svr, X_train, y_train,
                           groups=y_train, cv=10)
# scores
acc = cross_val_score(memo_svr, X_train, y_train,
                     groups=y_train, cv=10)

#Look at accuracy of prediction for each fold of the cross-validation
for i in range(10):
    print('Fold %s -- Acc = %s'%(i, acc[i]))

Fold 0 -- Acc = -1.5689236333459613
Fold 1 -- Acc = -0.5775701059852911
Fold 2 -- Acc = -3.254108886802393
Fold 3 -- Acc = -1.272783944819924
Fold 4 -- Acc = -2.95698585342798
Fold 5 -- Acc = -4.699251730175821
Fold 6 -- Acc = -4.555225089387556
Fold 7 -- Acc = -2.1194979862076653
Fold 8 -- Acc = -0.4306273550581172
Fold 9 -- Acc = -1.173475979894282
