In [27]:
from nilearn import datasets
import numpy as np
import pandas as pd
from sklearn.model_selection import cross_val_score
from nilearn.input_data import NiftiMasker
from sklearn.svm import SVC
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
import multiprocessing
from multiprocessing import Pool
from functools import partial
from beaker.cache import CacheManager
from beaker.util import parse_cache_config_options
from nilearn import datasets #only needed if testing

## Cache
Here we can outline the cache options we would like to save and then initallize it before the main() is called.

In [28]:
cache_opts = {
    'cache.type': 'file',
    'cache.data_dir': '/Users/gracer/Google Drive/decode/tmp/data',
    'cache.lock_dir': '/Users/gracer/Google Drive/decode/tmp/lock',
    'cache.regions' : 'short_term'
}

cache = CacheManager(**parse_cache_config_options(cache_opts))

tmpl_cache = cache.get_cache('/Users/gracer/Google Drive/decode/tmp/mytemplate.html', type='dbm', expire=3600)

## Main function
This function takes in the data files and is the only one that needs to be explictly called

In [29]:
@cache.cache('short_term')
def main(DATA, LABELS, MASK, FUNC):
    y = LABELS['labels']
    session = LABELS['chunks']
    condition_mask = y.isin(['shoe', 'bottle'])
    y = y[condition_mask]
    mask_filename = MASK
    
    svc = SVC(kernel='linear')

    k_range = [[10, 15, 30, 50, 150], [300, 500, 1000, 1500, 3000, 5000]]

    # Define the dimension reduction to be used.
    # Here we use a classical univariate feature selection based on F-test,
    # namely Anova. We set the number of features to be selected to 500
    feature_selection = SelectKBest(f_classif, k=500)

    # We have our classifier (SVC), our feature selection (SelectKBest), and now,
    # we can plug them together in a *pipeline* that performs the two operations
    # successively:
    anova_svc = Pipeline([('anova', feature_selection), ('svc', svc)])
    
    
    
    X=masker(MASK, FUNC, condition_mask, session)
    
    parallel_runs(anova_svc, X, y, k_range)

## Masker
This function takes in the mask image, conditions, session if necessary. It will return the X value that the CV_Scores will need

In [30]:
def masker(MASK, FUNC, condition_mask, session):
    # For decoding, standardizing is often very important
    nifti_masker = NiftiMasker(mask_img=MASK, sessions=session,
                               smoothing_fwhm=4, standardize=True,
                               memory="nilearn_cache", memory_level=1)
    func_filename = FUNC
    X = nifti_masker.fit_transform(func_filename)
    # Restrict to non rest data
    X = X[condition_mask]
    session = session[condition_mask]
    return(X)

## CV_Scores
This will take in the k range we want to calculate, the model that was build in main, the X from masker, and y from main. This is never explicitly called, it is called within the partial function and in the parallel function.

In [31]:
def CV_Scores(k_range, model,X, y):
    grid = GridSearchCV(model, param_grid={'anova__k': k_range}, verbose=1,
                    cv=3)
    nested_cv_scores = cross_val_score(grid, X, y, cv=3)

    print("Nested CV score: %.4f" % np.mean(nested_cv_scores))


## Parallel
This function is used to speed up the CV_Scoring process. It must be called in the main function. It expects the model, X, y and the range of K values 

In [32]:
def parallel_runs(model, X, y, k_range):
    pool = multiprocessing.Pool(multiprocessing.cpu_count())
    prod_x=partial(CV_Scores, model=model, X=X, y=y) 
    result_list = pool.map(prod_x, k_range) 
    print(result_list)

## Globals
You will need to read in the dataset (images), the mask, and the behavioral data

In [33]:
data= datasets.fetch_haxby()
func = data.func[0]
data0 = pd.read_csv(data.session_target[0], sep=" ")

In [34]:
main(data , data0, data.mask, func)

Fitting 3 folds for each of 5 candidates, totalling 15 fits


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Fitting 3 folds for each of 6 candidates, totalling 18 fits


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done  15 out of  15 | elapsed:    1.8s finished


Fitting 3 folds for each of 5 candidates, totalling 15 fits


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done  18 out of  18 | elapsed:    2.9s finished


Fitting 3 folds for each of 6 candidates, totalling 18 fits


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done  15 out of  15 | elapsed:    1.7s finished


Fitting 3 folds for each of 5 candidates, totalling 15 fits


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done  15 out of  15 | elapsed:    2.0s finished


Nested CV score: 0.5093


[Parallel(n_jobs=1)]: Done  18 out of  18 | elapsed:    3.1s finished


Fitting 3 folds for each of 6 candidates, totalling 18 fits


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done  18 out of  18 | elapsed:    2.3s finished


Nested CV score: 0.5972
[None, None]
