In [1]:
import pandas as pd
import numpy as np
import os
import nltools as nlt
import nilearn as nil
import nibabel as nib
import warnings
import glob
import random
import pickle
import dev_wtp_io_utils
import gc #garbage collection
from nilearn import plotting
from dev_wtp_io_utils import cv_train_test_sets, asizeof_fmt
from sklearn.model_selection import KFold,GroupKFold
import os, warnings

  warn("Fetchers from the nilearn.datasets module will be "


In [2]:
pd.set_option('display.max_rows', 99)

In this doc we're going to try to do some nested CV.

Based on data in `wtp_train_avg_test_ind-comparison-size.ipynb`, I'll use 30 subjects and 3-fold CV. This seems to have a fairly low error variance in results relative to larger CVs (because there's more data in each test fold).

### Load brain data

In [3]:
test_train_set = pd.read_csv("../data/train_test_markers_20210601T183243.csv")

In [4]:
with open('../data/Brain_Data_2sns_60subs.pkl', 'rb') as pkl_file:
    Brain_Data_allsubs = pickle.load(pkl_file)
    
dev_wtp_io_utils.check_BD_against_test_train_set(Brain_Data_allsubs,test_train_set)

checked for intersection and no intersection between the brain data and the subjects was found.
there were 60 subjects overlapping between the subjects marked for train data and the training dump file itself.


In [5]:
with open('../data/Brain_Data_2sns_60subs_grouped.pkl', 'rb') as pkl_file:
    Brain_Data_allsubs_grouped = pickle.load(pkl_file)
    
dev_wtp_io_utils.check_BD_against_test_train_set(Brain_Data_allsubs_grouped,test_train_set)

checked for intersection and no intersection between the brain data and the subjects was found.
there were 60 subjects overlapping between the subjects marked for train data and the training dump file itself.


### Preprocess

In [6]:
Brain_Data_allsubs.Y = Brain_Data_allsubs.X.response.copy()
print(Brain_Data_allsubs.Y.value_counts())
Brain_Data_allsubs.Y[Brain_Data_allsubs.Y=='NULL']=None
print(Brain_Data_allsubs.Y.value_counts())
print(Brain_Data_allsubs.Y.isnull().value_counts())
Brain_Data_allsubs_nn = Brain_Data_allsubs[Brain_Data_allsubs.Y.isnull()==False]
print(len(Brain_Data_allsubs_nn))
print(len(Brain_Data_allsubs))

5.0    1164
6.0    1018
7.0     904
8.0     604
Name: response, dtype: int64
5.0    1164
6.0    1018
7.0     904
8.0     604
Name: response, dtype: int64
False    3690
True      150
Name: response, dtype: int64
3690
3840


In [7]:
all_subs_nn_nifti = Brain_Data_allsubs_nn.to_nifti()
all_subs_nn_nifti_Y = Brain_Data_allsubs_nn.Y
all_subs_nn_nifti_groups = Brain_Data_allsubs_nn.X.subject
all_subs_nn_nifti_groups

0       DEV001
1       DEV001
2       DEV001
3       DEV001
4       DEV001
         ...  
3685    DEV089
3686    DEV089
3687    DEV089
3688    DEV089
3689    DEV089
Name: subject, Length: 3690, dtype: object

In [8]:
Brain_Data_allsubs_grouped.Y = Brain_Data_allsubs_grouped.X.response.copy()
print(Brain_Data_allsubs_grouped.Y.value_counts())
all_subs_grouped_nifti = Brain_Data_allsubs_grouped.to_nifti()
all_subs_grouped_nifti_Y = Brain_Data_allsubs_grouped.Y
all_subs_grouped_nifti_groups = Brain_Data_allsubs_grouped.X.subject
all_subs_grouped_nifti_groups

6.0    236
7.0    235
5.0    227
8.0    202
Name: response, dtype: int64


0      DEV001
1      DEV001
2      DEV001
3      DEV001
4      DEV001
        ...  
895    DEV089
896    DEV089
897    DEV089
898    DEV089
899    DEV089
Name: subject, Length: 900, dtype: object

### Get subset

In [9]:
del Brain_Data_allsubs
del Brain_Data_allsubs_grouped
gc.collect()

15

In [10]:
from nilearn.decoding import DecoderRegressor
dRegressor = DecoderRegressor(estimator = 'ridge_regressor', standardize= True,scoring="r2")

In [11]:
import sys
for name, size in sorted(((name, sys.getsizeof(value)) for name, value in locals().items()),
                         key= lambda x: -x[1])[:10]:
    print(name + ': ' + str(size))

___: 232614
all_subs_nn_nifti_groups: 232614
_7: 232614
__: 56844
all_subs_grouped_nifti_groups: 56844
_8: 56844
all_subs_nn_nifti_Y: 29664
test_train_set: 9549
all_subs_grouped_nifti_Y: 7344
DecoderRegressor: 1472


In [12]:
asizeof_fmt(Brain_Data_allsubs_nn)

'3.3 GiB'

In [13]:
asizeof_fmt(all_subs_nn_nifti)

'12.4 GiB'

As a control, we'll try this again, this time just training and testing on individual values:

get a small sample of subjects to extract

In [14]:
sample_subject_items = np.unique(all_subs_nn_nifti_groups)[0:30]

In [15]:

sample_subject_vector = [i for i, x in enumerate(all_subs_nn_nifti_groups) if x in sample_subject_items]
sample_grouped_subject_vector = [i for i, x in enumerate(all_subs_grouped_nifti_groups) if x in sample_subject_items]

In [16]:
first_subs_nifti = nib.funcs.concat_images([all_subs_nn_nifti.slicer[...,s] for s in sample_subject_vector])
first_subs_nifti_Y = all_subs_nn_nifti_Y[sample_subject_vector]
first_subs_nifti = nil.image.clean_img(first_subs_nifti,detrend=False,standardize=True)
first_subs_nifti_groups = all_subs_nn_nifti_groups[sample_subject_vector]

del all_subs_nn_nifti
gc.collect()

116

In [17]:
first_subs_nifti.shape

(91, 109, 91, 1835)

In [18]:
first_subs_grouped_nifti = nib.funcs.concat_images([all_subs_grouped_nifti.slicer[...,s] for s in sample_grouped_subject_vector])
first_subs_grouped_nifti_Y = all_subs_grouped_nifti_Y[sample_grouped_subject_vector]
first_subs_grouped_nifti = nil.image.clean_img(first_subs_grouped_nifti,detrend=False,standardize=True)
first_subs_grouped_nifti_groups = all_subs_grouped_nifti_groups[sample_grouped_subject_vector]

del all_subs_grouped_nifti
gc.collect()

29

In [21]:
cv_outer = KFold(n_splits=3)

In [22]:
test_scores_same = cv_train_test_sets(
    trainset_X=first_subs_nifti,
    trainset_y=first_subs_nifti_Y,
    trainset_groups=first_subs_nifti_groups,
    cv=cv_outer
    
)

print(test_scores_same)

Groups are the same.
using default regressor. In order to test on a training group of 20 items, holding out the following subjects:['DEV023' 'DEV001' 'DEV022' 'DEV039' 'DEV024' 'DEV012' 'DEV006' 'DEV018'
 'DEV010' 'DEV026']. prepping fold data.... regressing.... 8.2 GiB. trying regressor 1 of 1. 

KeyboardInterrupt: 

In [None]:
test_scores_same

### Predict

Regressing in nilearn:
 - https://nilearn.github.io/decoding/estimator_choice.html
 - http://www.ncbi.nlm.nih.gov/pubmed/20691790







OK, so that's how you do it. It's pretty straightforward.

So...we won't look at nested cross-validation juuust yet, because the next step is to work out how to train on one set and predict on another. that will definitely require a custom pipeline. Let's get started...

and now extract them

In [20]:
print(test_scores_same)
print(np.mean(test_scores_same))

NameError: name 'test_scores_same' is not defined

In [None]:
test_scores_different = cv_train_test_sets(
    trainset_X=first_subs_grouped_nifti,
    trainset_y=first_subs_grouped_nifti_Y,
    trainset_groups=first_subs_grouped_nifti_groups,
    testset_X=first_subs_nifti,
    testset_y=first_subs_nifti_Y,
    testset_groups=first_subs_nifti_groups,
    cv = cv_outer)

In [None]:
print(test_scores_different)
print(np.mean(test_scores_different))

### Do a nested CV

In [23]:
test_scores_different = cv_train_test_sets(
    trainset_X=first_subs_grouped_nifti,
    trainset_y=first_subs_grouped_nifti_Y,
    trainset_groups=first_subs_grouped_nifti_groups,
    testset_X=first_subs_nifti,
    testset_y=first_subs_nifti_Y,
    testset_groups=first_subs_nifti_groups,
    param_grid = {'C':[0.04,0.2,1]},
    cv = cv_outer)
    #regressors=[svrreg,ridgereg])

using default regressor. In order to test on a training group of 20 items, holding out the following subjects:['DEV023' 'DEV001' 'DEV022' 'DEV039' 'DEV024' 'DEV012' 'DEV006' 'DEV018'
 'DEV010' 'DEV026']. prepping fold data.... regressing.... 2.0 GiB. trying regressor 1 of 1. predicting. test score was:. 0.09912683815577172
In order to test on a training group of 20 items, holding out the following subjects:['DEV009' 'DEV041' 'DEV019' 'DEV029' 'DEV034' 'DEV036' 'DEV042' 'DEV035'
 'DEV015' 'DEV005']. prepping fold data.... regressing.... 2.0 GiB. trying regressor 1 of 1. 



predicting. test score was:. 0.08429697476242537
In order to test on a training group of 20 items, holding out the following subjects:['DEV025' 'DEV014' 'DEV027' 'DEV016' 'DEV017' 'DEV021' 'DEV013' 'DEV040'
 'DEV028' 'DEV030']. prepping fold data.... regressing.... 2.0 GiB. trying regressor 1 of 1. 



predicting. test score was:. 0.09361122344248562


In [25]:
test_scores_different

([0.09912683815577172, 0.08429697476242537, 0.09361122344248562],
 [{'train_results': {"DecoderRegressor(cv=GroupKFold(n_splits=3),\n                 estimator=SVR(kernel='linear', max_iter=10000.0),\n                 memory=Memory(location=None),\n                 param_grid={'C': [0.04, 0.2, 1]})": {'hyper_score': 0.11913056691821511,
     'cv_scores_': {'beta': [-0.009791175820098363,
       0.14828916853289908,
       0.2188937080418446]},
     'cv_params_': {'beta': {'C': [1, 1, 1]}}}}},
  {'train_results': {"DecoderRegressor(cv=GroupKFold(n_splits=3),\n                 estimator=SVR(kernel='linear', max_iter=10000.0),\n                 memory=Memory(location=None),\n                 param_grid={'C': [0.04, 0.2, 1]})": {'hyper_score': 0.16620120040834505,
     'cv_scores_': {'beta': [0.2219274298853654,
       0.15230892345119262,
       0.12436724788847708]},
     'cv_params_': {'beta': {'C': [1, 1, 1]}}}}},
  {'train_results': {"DecoderRegressor(cv=GroupKFold(n_splits=3),\n     

In [26]:
print(np.mean(test_scores_different[0]))

0.09234501212022757


In [27]:
test_scores_same = cv_train_test_sets(
    trainset_X=first_subs_nifti,
    trainset_y=first_subs_nifti_Y,
    trainset_groups=first_subs_nifti_groups,
    param_grid = {'C':[0.04,0.2,1]},
    cv = cv_outer
)

print(test_scores_same)

Groups are the same.
using default regressor. In order to test on a training group of 20 items, holding out the following subjects:['DEV023' 'DEV001' 'DEV022' 'DEV039' 'DEV024' 'DEV012' 'DEV006' 'DEV018'
 'DEV010' 'DEV026']. prepping fold data.... regressing.... 8.2 GiB. trying regressor 1 of 1. predicting. test score was:. 0.17277547300186813
In order to test on a training group of 20 items, holding out the following subjects:['DEV009' 'DEV041' 'DEV019' 'DEV029' 'DEV034' 'DEV036' 'DEV042' 'DEV035'
 'DEV015' 'DEV005']. prepping fold data.... regressing.... 8.3 GiB. trying regressor 1 of 1. 



predicting. test score was:. 0.18869428982572367
In order to test on a training group of 20 items, holding out the following subjects:['DEV025' 'DEV014' 'DEV027' 'DEV016' 'DEV017' 'DEV021' 'DEV013' 'DEV040'
 'DEV028' 'DEV030']. prepping fold data.... regressing.... 8.3 GiB. trying regressor 1 of 1. 



predicting. test score was:. 0.17788790069591043
([0.17277547300186813, 0.18869428982572367, 0.17788790069591043], [{'train_results': {"DecoderRegressor(cv=GroupKFold(n_splits=3),\n                 estimator=SVR(kernel='linear', max_iter=10000.0),\n                 memory=Memory(location=None),\n                 param_grid={'C': [0.04, 0.2, 1]})": {'hyper_score': -0.009585209712001386, 'cv_scores_': {'beta': [-0.10343086462934692, 0.002017276983000693, 0.07265795851034207]}, 'cv_params_': {'beta': {'C': [1, 1, 1]}}}}}, {'train_results': {"DecoderRegressor(cv=GroupKFold(n_splits=3),\n                 estimator=SVR(kernel='linear', max_iter=10000.0),\n                 memory=Memory(location=None),\n                 param_grid={'C': [0.04, 0.2, 1]})": {'hyper_score': -0.05573643404727924, 'cv_scores_': {'beta': [0.08291227612920538, -0.14061208612632092, -0.10950949214472216]}, 'cv_params_': {'beta': {'C': [1, 1, 1]}}}}}, {'train_results': {"DecoderRegressor(cv=GroupKFold(n_splits=3),\n  

In [28]:
print(np.mean(test_scores_same[0]))

0.17978588784116742


In [29]:
test_scores_same[1]

[{'train_results': {"DecoderRegressor(cv=GroupKFold(n_splits=3),\n                 estimator=SVR(kernel='linear', max_iter=10000.0),\n                 memory=Memory(location=None),\n                 param_grid={'C': [0.04, 0.2, 1]})": {'hyper_score': -0.009585209712001386,
    'cv_scores_': {'beta': [-0.10343086462934692,
      0.002017276983000693,
      0.07265795851034207]},
    'cv_params_': {'beta': {'C': [1, 1, 1]}}}}},
 {'train_results': {"DecoderRegressor(cv=GroupKFold(n_splits=3),\n                 estimator=SVR(kernel='linear', max_iter=10000.0),\n                 memory=Memory(location=None),\n                 param_grid={'C': [0.04, 0.2, 1]})": {'hyper_score': -0.05573643404727924,
    'cv_scores_': {'beta': [0.08291227612920538,
      -0.14061208612632092,
      -0.10950949214472216]},
    'cv_params_': {'beta': {'C': [1, 1, 1]}}}}},
 {'train_results': {"DecoderRegressor(cv=GroupKFold(n_splits=3),\n                 estimator=SVR(kernel='linear', max_iter=10000.0),\n       

### Nested CV, trying out different decoders

In [61]:
cv_outer = KFold(n_splits=3)
cv_inner =  GroupKFold(3)

svr_reg_set = []
ridge_reg_set = []
for sp in [1,5,20]:
    svr_reg = DecoderRegressor(
    estimator = 'svr',
    param_grid = {
            'C':[0.04,0.2,1],
            'epsilon':[0.01,0.1,1]
        },
    screening_percentile = sp,
    standardize= True,cv=cv_inner, scoring="r2"
    )
    svr_reg_set = svr_reg_set + [svr_reg]
    

    ridgereg = DecoderRegressor(
        estimator = 'ridge',
        screening_percentile = sp,
        standardize= True,cv=cv_inner, scoring="r2"
    )
    ridge_reg_set = ridge_reg_set + [ridgereg]

regressors = ridge_reg_set + svr_reg_set


In [62]:
test_scores_different = cv_train_test_sets(
    trainset_X=first_subs_grouped_nifti,
    trainset_y=first_subs_grouped_nifti_Y,
    trainset_groups=first_subs_grouped_nifti_groups,
    testset_X=first_subs_nifti,
    testset_y=first_subs_nifti_Y,
    testset_groups=first_subs_nifti_groups,
    regressors = regressors,
    cv=cv_outer)

In order to test on a training group of 20 items, holding out the following subjects:['DEV023' 'DEV001' 'DEV022' 'DEV039' 'DEV024' 'DEV012' 'DEV006' 'DEV018'
 'DEV010' 'DEV026']. prepping fold data.... regressing.... 2.0 GiB. trying regressor 1 of 6. predicting. trying regressor 2 of 6. predicting. trying regressor 3 of 6. predicting. trying regressor 4 of 6. predicting. trying regressor 5 of 6. predicting. trying regressor 6 of 6. predicting. test score was:. 0.044857485810989806
In order to test on a training group of 20 items, holding out the following subjects:['DEV009' 'DEV041' 'DEV019' 'DEV029' 'DEV034' 'DEV036' 'DEV042' 'DEV035'
 'DEV015' 'DEV005']. prepping fold data.... regressing.... 2.0 GiB. trying regressor 1 of 6. 



predicting. trying regressor 2 of 6. 



predicting. trying regressor 3 of 6. 



predicting. trying regressor 4 of 6. 



predicting. trying regressor 5 of 6. 



predicting. trying regressor 6 of 6. 



predicting. test score was:. 0.02556375773762265
In order to test on a training group of 20 items, holding out the following subjects:['DEV025' 'DEV014' 'DEV027' 'DEV016' 'DEV017' 'DEV021' 'DEV013' 'DEV040'
 'DEV028' 'DEV030']. prepping fold data.... regressing.... 2.0 GiB. trying regressor 1 of 6. 



predicting. trying regressor 2 of 6. 



predicting. trying regressor 3 of 6. 



predicting. trying regressor 4 of 6. 



predicting. trying regressor 5 of 6. 



predicting. trying regressor 6 of 6. 



predicting. test score was:. 0.15354912298044054


So which is better out of the three screening_percentile values tested?

In [82]:
list(test_scores_different[1][0]['train_results']['DecoderRegressor(cv=GroupKFold(n_splits=3),\n                 estimator=RidgeCV(alphas=array([ 0.1,  1. , 10. ])),\n                 memory=Memory(location=None), screening_percentile=1)'
                                            ]['cv_scores_'].keys())==['beta']

True

In [98]:
def describe_scores(scores_return):
    for regressor_desc in scores_return[2]['train_results'].keys():
        print("")
        print(regressor_desc)

        hyper_scores = [scores_return[f]['train_results'][regressor_desc]['hyper_score'] for f in range(len(scores_return))]
        print(np.mean(hyper_scores))
        print(hyper_scores)
    #     for fold in range(len(test_scores_different[1])):
    #         print(test_scores_different[1][fold]['train_results'][regressor_desc])

describe_scores(test_scores_different[1])


DecoderRegressor(cv=GroupKFold(n_splits=3),
                 estimator=RidgeCV(alphas=array([ 0.1,  1. , 10. ])),
                 memory=Memory(location=None), screening_percentile=1)
-0.4507643239578673
[-0.2326966199628411, -0.6638188202644012, -0.45577753164635965]

DecoderRegressor(cv=GroupKFold(n_splits=3),
                 estimator=RidgeCV(alphas=array([ 0.1,  1. , 10. ])),
                 memory=Memory(location=None), screening_percentile=5)
0.14175793478535756
[0.1649470845157883, 0.03166753402629748, 0.22865918581398692]

DecoderRegressor(cv=GroupKFold(n_splits=3),
                 estimator=RidgeCV(alphas=array([ 0.1,  1. , 10. ])),
                 memory=Memory(location=None))
0.17342210052179943
[0.0924656393687809, 0.16576041105501924, 0.2620402511415981]

DecoderRegressor(cv=GroupKFold(n_splits=3),
                 estimator=SVR(kernel='linear', max_iter=10000.0),
                 memory=Memory(location=None),
                 param_grid={'C': [0.04, 0.2, 1], 'epsilo

In [127]:
test_scores_same = cv_train_test_sets(
    trainset_X=first_subs_nifti,
    trainset_y=first_subs_nifti_Y,
    trainset_groups=first_subs_nifti_groups,
    regressors = regressors,
    cv=cv_outer)

Groups are the same.
In order to test on a training group of 20 items, holding out the following subjects:['DEV023' 'DEV001' 'DEV022' 'DEV039' 'DEV024' 'DEV012' 'DEV006' 'DEV018'
 'DEV010' 'DEV026']. prepping fold data.... regressing.... 8.2 GiB. trying regressor 1 of 6. predicting. trying regressor 2 of 6. 



predicting. trying regressor 3 of 6. 



predicting. trying regressor 4 of 6. 



predicting. trying regressor 5 of 6. 



predicting. trying regressor 6 of 6. 



predicting. test score was:. 0.18229833387917527
In order to test on a training group of 20 items, holding out the following subjects:['DEV009' 'DEV041' 'DEV019' 'DEV029' 'DEV034' 'DEV036' 'DEV042' 'DEV035'
 'DEV015' 'DEV005']. prepping fold data.... regressing.... 8.3 GiB. trying regressor 1 of 6. 



predicting. trying regressor 2 of 6. 



predicting. trying regressor 3 of 6. 



predicting. trying regressor 4 of 6. 



predicting. trying regressor 5 of 6. 



predicting. trying regressor 6 of 6. 



predicting. test score was:. 0.20963485216593358
In order to test on a training group of 20 items, holding out the following subjects:['DEV025' 'DEV014' 'DEV027' 'DEV016' 'DEV017' 'DEV021' 'DEV013' 'DEV040'
 'DEV028' 'DEV030']. prepping fold data.... regressing.... 8.3 GiB. trying regressor 1 of 6. 



predicting. trying regressor 2 of 6. 



predicting. trying regressor 3 of 6. 



predicting. trying regressor 4 of 6. 



predicting. trying regressor 5 of 6. 



predicting. trying regressor 6 of 6. 



predicting. test score was:. 0.18003803593181888


In [131]:
print(test_scores_same[0])
print(test_scores_different[0])

[0.18229833387917527, 0.20963485216593358, 0.18003803593181888]
[0.044857485810989806, 0.02556375773762265, 0.15354912298044054]


In [132]:
describe_scores(test_scores_same[1])


DecoderRegressor(cv=GroupKFold(n_splits=3),
                 estimator=RidgeCV(alphas=array([ 0.1,  1. , 10. ])),
                 memory=Memory(location=None), screening_percentile=1)
-0.5062852263118208
[-0.5001891880290674, -0.6770925151423778, -0.3415739757640171]

DecoderRegressor(cv=GroupKFold(n_splits=3),
                 estimator=RidgeCV(alphas=array([ 0.1,  1. , 10. ])),
                 memory=Memory(location=None), screening_percentile=5)
-0.47918944147709003
[-0.5047063048756234, -0.5841824442317359, -0.34867957532391075]

DecoderRegressor(cv=GroupKFold(n_splits=3),
                 estimator=RidgeCV(alphas=array([ 0.1,  1. , 10. ])),
                 memory=Memory(location=None))
-0.04175874490088224
[-0.0555266593787808, -0.12145363647163498, 0.05170406114776908]

DecoderRegressor(cv=GroupKFold(n_splits=3),
                 estimator=SVR(kernel='linear', max_iter=10000.0),
                 memory=Memory(location=None),
                 param_grid={'C': [0.04, 0.2, 1], '

#### Nested cross-validation

See for instance: http://nilearn.github.io/auto_examples/02_decoding/plot_haxby_grid_search.html
        
See also https://machinelearningmastery.com/nested-cross-validation-for-machine-learning-with-python/

and this thread: https://neurostars.org/t/nested-cross-validation/19459

The Haxby code:

(1) Loops through each outer loop
(2) And for each outer loop, loops again through each parameter
(3) And then estimates three times on the _training_ data for that
(4) Then gets a prediction score for that hyperparameter configuration

This is fundamentally the same as the nested CV in Machine Learning Mastery, with one exception. The Haxby code calculates a prediction on the test set once per hyperparameter. The MLM code calculates a prediction on the test set once per outer split. There is more stuff going on in the GridSearchCV but you don't see that.

The Decoder object actually as a param_grid command so this could be useful for doing the parameter search properly, in the inner loop.


Issues here:

 - Need to get the cross-validation right. We have got it working but there is an inner validation that selects a decoder for each group. Does that make sense? I think so, but just need to consider it a bit carefully.
 - What are the individual methods that inner validation is running? Should keep track of that
 - We really need to compare this to a baseline. So we need to run an individual-level analysis


In [133]:
print(test_scores_same[0])
print(test_scores_different[0])

[0.18229833387917527, 0.20963485216593358, 0.18003803593181888]
[0.044857485810989806, 0.02556375773762265, 0.15354912298044054]


In [134]:
print(np.mean(test_scores_same[0]))
print(np.mean(test_scores_different[0]))

0.19065707399230925
0.07465678884301767


Looks like:
 - SVR is better than Ridge
 - Screening percentile over 5 doesn't seem to increase performance

## Try out different values of alpha for ridge regressor

In [141]:
cv_outer = KFold(n_splits=3)
cv_inner =  GroupKFold(3)

ridgereg = DecoderRegressor(
        estimator = 'ridge_regressor',
        screening_percentile = 20,
        param_grid = {'alphas':[0.2,0.9]},
        standardize= True,cv=cv_inner, scoring="r2"
    )


test_scores_different_ridge = cv_train_test_sets(
    trainset_X=first_subs_grouped_nifti,
    trainset_y=first_subs_grouped_nifti_Y,
    trainset_groups=first_subs_grouped_nifti_groups,
    testset_X=first_subs_nifti,
    testset_y=first_subs_nifti_Y,
    testset_groups=first_subs_nifti_groups,
    regressors = [ridgereg],
    cv=cv_outer)

In order to test on a training group of 20 items, holding out the following subjects:['DEV023' 'DEV001' 'DEV022' 'DEV039' 'DEV024' 'DEV012' 'DEV006' 'DEV018'
 'DEV010' 'DEV026']. prepping fold data.... regressing.... 2.0 GiB. trying regressor 1 of 1. 

RuntimeError: Cannot clone object RidgeCV(alphas=0.2), as the constructor either does not set or modifies parameter alphas