In [307]:
import os
import time
import math
import random
import itertools
import scipy.io
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.svm import SVC
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import ParameterGrid
from IPython.display import clear_output

In [217]:
def df_to_arr(df):
    
    vals = []
    for column in df:
        vals.extend(df[column].tolist())
    return np.array([x for x in vals if str(x) != 'nan'])

# SVM Training Functions

<a href="https://www.csie.ntu.edu.tw/~cjlin/papers/guide/guide.pdf">Guide to SVM Training</a>
### To-do:
<ul>
    <li> Check ranking system and data standardization </li>
    <li> Make data generation process more efficient/less redundant </li>
</ul>

In [2]:
def get_subjects(path):
    
    '''
    Gets a list of subject IDs and the file suffix, given a path to the data files. 
    
    Note: subject ID must be only 2 characters for this to work, and all data files
    must have same suffix.
    
    Parameters
    ----------
    path: str
        directory to the data files
        
    Returns
    -------
    list
        a list of subject IDs
    str
        the suffix to the filenames
    '''
    
    files = os.listdir(path)
    subjects = [f[:2] for f in files]
    suffix = files[0][2:]
        
    subjects.sort()
    
    return subjects, suffix

In [226]:
def scramble_labels(y_data, classes):
    
    '''
    Randomly selects half of the labels in the data to switch to the other class.
    
    Parameters
    ----------
    y_data: array-like
        label data to scramble
    classes: list
        the two different classes of labels
    '''
    
    y_data_copy = y_data.copy()
    for index in np.nditer(np.random.choice(len(y_data), size=len(y_data)//2, replace=False)):
        
        if y_data[index] == classes[0]:
            y_data[index] = classes[1]
        else:
            y_data[index] = classes[0]
    
    # Makes sure labels are scrambled properly
    num_diff = sum(i != j for i, j in zip(y_data, y_data_copy))  
    if num_diff != len(y_data)//2:
        raise ValueError
    

In [410]:
def extract_subject_data(path, subject, suffix, roi, conds, block_length, rank_first, shuffle):
    
    '''
    Extracts individual subject data from the .mat files.
    
    Parameters
    ----------
    path: str
        directory to data files
    subject: str
        ID of subject to load data for
    suffix: str
        ending suffix of the data filename
    roi: int
        0 for V1 data, 1 for MT data
    conds: list
        list of integers specifying the conditional datasets to extract
        (0 for trained_cp, 1 for trained_ip, 2 for untrained_cp, 3 for untrained_ip)
    block_length: int
        the number of voxels to standardize every block in the dataset to
    rank_first: boolean
        whether to use first block in subject to order the rest of the blocks for that subject
    shuffle: boolean
        whether to randomize which block to use in rank-ordering
        
    Returns
    -------
    List of voxel data (x_data) separated by individual blocks and the corresponding labels (y_data)
    '''
    
    x_data = []
    y_data = []
    
    path_to_file = path + subject + suffix
    mat = scipy.io.loadmat(path_to_file)['roi_scanData'][0][roi]
    
    ranked_indices = None
    
    for scan in range(len(mat[0])):
            
        for cond in conds:
            
            blocks = [x for x in range(len(mat[0][scan][0][cond][0]))]
            if rank_first and shuffle:
                random.shuffle(blocks)
            for block in blocks:
                block_data = []
                for tr in range(len(mat[0][scan][0][cond][0][block][0])):
                    
                    # Extract all voxel data from individual TRs
                    block_data.extend(mat[0][scan][0][cond][0][block][0][tr][0][0][0].tolist())
                    
                if rank_first:
                    if ranked_indices is None:
                        ranked_indices = [i for i in (np.array(block_data)).argsort()[-block_length:]]
                        ranked_indices = np.flip(ranked_indices)
                    block_data = [block_data[i] if i < len(block_data) else 0 for i in ranked_indices]
                else:
                    # Filters for most active voxels in each block
                    block_data.sort()
                    block_data = block_data[-block_length:]
                
                x_data.append(block_data)
                y_data.append(mat[0][scan][1][cond][0])
    
    data = {'x': x_data, 'y': y_data}
    return data

In [385]:
path = r'scans/output/PRE/'
roi = 1                            # V1-roi: 0, MT-roi: 1
conds = [1, 3]                     # trained_cp: 0, trained_ip: 1, untrained_cp: 2, untrained_ip: 3
block_length = 624

subjects, suffix = get_subjects(path)
columns = [x for x in range(624)]
blocks = pd.DataFrame(columns = columns)

x_train, _, x_inner, _, x_outer, _ = generate_data(subjects, [subjects[0]], subjects[1], path, suffix, roi, conds, block_length, True, False)
for b in x_inner:
    blocks.loc[len(blocks)] = b
for b in x_outer:
    blocks.loc[len(blocks)] = b    
for b in x_train:
    blocks.loc[len(blocks)] = b
blocks

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,614,615,616,617,618,619,620,621,622,623
0,0.073359,0.052941,0.050193,0.050193,0.050193,0.040964,0.040619,0.038806,0.037624,0.036750,...,0.000000,0.000000,0.000000,0.000000,-0.001038,-0.001081,-0.001081,-0.001081,-0.001088,-0.001100
1,-0.003774,0.002933,0.033962,0.033962,0.026415,0.011820,0.003846,0.016153,0.003953,0.026923,...,0.006061,-0.006276,0.010169,-0.005181,0.004158,0.000000,0.003202,-0.004310,-0.003250,0.008734
2,0.044776,0.031700,-0.007463,0.037313,0.029851,0.004739,0.034221,0.010309,0.033268,0.011407,...,0.005056,-0.004193,0.000000,-0.005160,0.001032,-0.003222,0.005405,0.001074,-0.003250,0.001103
3,0.022556,0.000000,0.030075,0.030075,0.022556,0.018868,0.011494,0.023599,0.005894,0.022989,...,0.010204,-0.002110,0.005714,-0.007254,0.015641,-0.005405,0.011866,-0.005405,0.004376,0.012101
4,0.000000,-0.003040,0.045802,0.022901,0.038168,0.033019,-0.004049,0.012517,0.002012,0.016194,...,0.000000,-0.012397,0.001125,-0.004016,0.006122,-0.002105,0.000000,0.000000,0.003300,0.002183
5,-0.029630,-0.041420,0.014815,-0.014815,0.000000,0.006928,-0.026052,0.009655,-0.037328,0.014028,...,-0.003899,-0.001030,0.006787,-0.005005,-0.005045,0.001054,-0.009585,0.007376,-0.004396,-0.001091
6,0.011407,0.030488,0.041825,0.034221,-0.011407,0.033019,0.048980,0.028011,0.036290,0.028571,...,0.000979,-0.016393,-0.001130,0.012121,0.000000,-0.001056,0.004283,0.007392,0.007701,-0.001098
7,-0.003745,-0.018072,-0.026217,0.003745,0.026217,-0.034483,-0.010060,-0.002770,-0.005941,0.006036,...,-0.001957,-0.016461,0.011287,-0.010000,-0.014228,-0.006303,0.016043,-0.006303,-0.010965,0.009858
8,-0.014493,-0.023810,-0.007246,0.028986,-0.043478,0.013825,0.008097,0.008357,0.001972,0.008097,...,-0.001953,-0.006289,0.011468,-0.001007,0.001021,0.001072,0.000000,-0.001072,0.005599,0.005513
9,-0.018182,-0.014749,0.010909,0.018182,-0.025455,0.015945,-0.014028,0.012552,-0.017544,0.010020,...,-0.018573,-0.013584,-0.005714,-0.006036,-0.017189,-0.008547,-0.009646,-0.006410,-0.019694,-0.017621


In [411]:
'''
Generates training and testing data.
'''
def generate_data(subjects, inner_test_subjects, outer_test_subject, path, suffix, roi, conds, block_length, rank_first, shuffle):
    
    '''
    Generates training and testing data, which is separated into training data, inside testing data,
    and outside testind data.
    
    Parameters
    ----------
    subjects: list
        a list of subject IDs to extract data from
    inner_test_subject: list
        list of subject IDs of the inner test subjects
    outer_test_subject: str
        the ID of the outer test subject
    path: str
        the path to the data files
    suffix: str
        ending suffix of the data filename
    roi: int
        0 for V1 data, 1 for MT data
    conds: list
        list of integers specifying the conditional datasets to extract
        (0 for trained_cp, 1 for trained_ip, 2 for untrained_cp, 3 for untrained_ip)    
    block_length: int
        the number of voxels to standardize every block in the dataset to
    rank_first: boolean
        whether to use first block in subject to order the rest of the blocks for that subject
    shuffle: boolean
        whether to randomize which block to use in rank-ordering
    
    Returns
    -------
    list
        blocks of voxel data for training use
    list
        training labels
    list
        inner test subject blocks of voxel data for testing use
    list 
        testing labels for inner test subject
    list
        outer test subject blocks of voxel data for testing use
    list
        testing labels for outer test subject
    '''
    
    data = []
    
    x_train = []
    y_train = []
    
    x_test_inner = []
    y_test_inner = []
    
    x_test_outer = []
    y_test_outer = []
    
    for subject in subjects:
        
        subject_data = extract_subject_data(path, subject, suffix, roi, conds, block_length, rank_first, shuffle)
        if subject == outer_test_subject:
            x_test_outer.extend(subject_data['x'])
            y_test_outer.extend(subject_data['y'])
        elif subject in inner_test_subjects:
            x_test_inner.extend(subject_data['x'])
            y_test_inner.extend(subject_data['y'])
        else:
            x_train.extend(subject_data['x'])
            y_train.extend(subject_data['y'])
    
    x_train_len = len(x_train)
    x_test_outer_len = len(x_test_outer)
    
    data.extend(x_train)
    data.extend(x_test_outer)
    data.extend(x_test_inner)
    
    # MinMaxScaler scales each feature to values between 0 and 1 among all x data
    scaler = MinMaxScaler(feature_range=(0, 1))
    x_standardized = scaler.fit_transform(data)
    x_train, x_test_outer, x_test_inner = x_standardized[:x_train_len], x_standardized[x_train_len:x_train_len+x_test_outer_len], x_standardized[x_train_len+x_test_outer_len:]

    y_train = np.stack(y_train, axis=0)
    
    return x_train, y_train, x_test_inner, y_test_inner, x_test_outer, y_test_outer

In [412]:
def get_optimal_run(x_train, y_train, x_test, y_test, kernels, gamma_range, C_range):
    
    '''
    Gets best hyperparameters (kernel, C, and gamma values) that optimize SVM's predictions for given
    x and y test dataset.
    
    Parameters
    ----------
    x_train: array-like
        dataset of block data used to train classifier
    y_train: array-like
        dataset of label data used to train classifier
    x_test: array-like
        testing dataset of block data used to optimize hyperparameters on
    y_test: array-like
        testing dataset of label data used to optimize hyperparameters on
    kernels: list
        kernels to test (recommended options are 'linear', 'rbf', and 'sigmoid')
    gamma_range: dict
        dict that specifies the range of values of gamma to test; should include start, stop to range,
        num of values, and the exponential base
    C_range: dict
        dict that specifies the range of values of C to test; should include start, stop to range,
        num of values, and the exponential base
        
    Returns
    -------
    dict
        best combination of parameters found from grid search
    float
        best accuracy obtained from testing
    '''
    
    gamma_vals = np.logspace(gamma_range['start'], gamma_range['stop'], gamma_range['num'], base=gamma_range['base'])
    C_vals = np.logspace(C_range['start'], C_range['stop'], C_range['num'], base=C_range['base'])

    param_grid = ParameterGrid({'kernel': kernels, 'gamma': gamma_vals, 'C': C_vals})
    
    best_acc = 0
    best_params = None
    
    # Tests each parameter combination to find best one for given testing data
    for params in list(param_grid):
        
        svclassifier = SVC(kernel=params['kernel'], gamma=params['gamma'], C=params['C'], max_iter=-1)
        svclassifier.fit(x_train, y_train)
        
        curr_acc = svclassifier.score(x_test, y_test)
        
        if curr_acc > best_acc:
            best_acc = curr_acc
            best_params = params
            
    return best_params, best_acc

In [413]:
def train(data_params, grid_params, num_inner=1, scramble=False, classes=None, rank_first=True, shuffle=False):
    
    '''
    Trains and tests the classifier for accuracy.
    
    Parameters
    ----------
    data_params: dict
        path: str
            the path to the data files
        roi: int
            0 for V1 data, 1 for MT data
        conds: list
            list of integers specifying the conditional datasets to extract
            (0 for trained_cp, 1 for trained_ip, 2 for untrained_cp, 3 for untrained_ip)    
        block_length: int
            the number of voxels to standardize every block in the dataset to
    grid_params: dict
        kernels: list
            kernels to test (recommended options are 'linear', 'rbf', and 'sigmoid')
        gamma: dict
            dict that specifies the range of values of gamma to test; should include start, stop to range,
            num of values, and the exponential base
        C: dict
            dict that specifies the range of values of C to test; should include start, stop to range,
            num of values, and the exponential base
    num_inner: int
        number of inner subjects to test classifier on,
        default is 1
    scramble: boolean, optional
        whether or not to scramble the labels when training, 
        default is False
    classes: list, optional if scramble is False but required if scramble is True
        label classes for the data (should be length of 2)
    rank_first: boolean
        whether to use first block in subject to order the rest of the blocks for that subject,
        default is True
    shuffle: boolean
        whether to randomize which block to use in rank-ordering, 
        default is False
        
    Returns
    -------
    DataFrame
        data of inner subject combination testing accuracy
    DataFrame
        data of outer subject testing accuracy
    '''
    
    subjects, suffix = get_subjects(data_params['path'])
    
    cols = []
    for combo in itertools.combinations(range(len(subjects)), num_inner):
        col = ''
        for subject in combo:
            col += '/' + subjects[subject]
        cols.append(col[1:])

    inner_acc_report = pd.DataFrame(index=subjects, columns=cols)
    outer_acc_report = pd.DataFrame(index=subjects, columns=cols)
    
    for outer_subject in subjects:
        
        print("Currently on outer subject #%i." % (subjects.index(outer_subject)+1))

        start_time = time.time()
        
        inner_subjects = [s for s in subjects if s != outer_subject]
        for inner_subject_test in itertools.combinations((inner_subjects), num_inner):
            
            inner_subject_test = list(inner_subject_test)

            col = ''
            for subject in inner_subject_test:
                col += '/' + subject
            col = col[1:]
            # print("Currently on combination of %s." % (col))    
            
            x_train, y_train, x_test_inner, y_test_inner, x_test_outer, y_test_outer = generate_data(subjects, inner_subject_test, outer_subject, data_params['path'], suffix, data_params['roi'], data_params['conds'], data_params['block_length'], rank_first, shuffle)
            if scramble:
                scramble_labels(y_train, classes)
                
            # gets optimal params for training dataset from grid search
            opt_params, inner_acc = get_optimal_run(x_train, y_train, x_test_inner, y_test_inner, grid_params['kernels'], grid_params['gamma'], grid_params['C']) 

            # train model using optimal params for this set
            svclassifier = SVC(kernel=opt_params['kernel'], gamma=opt_params['gamma'], C=opt_params['C'], max_iter=-1)
            svclassifier.fit(x_train, y_train)
            
            outer_acc = svclassifier.score(x_test_outer, y_test_outer)
            
            # logs inner and outer subject accuracy data in dataframe
            inner_acc_report.at[outer_subject, col] = inner_acc
            outer_acc_report.at[outer_subject, col] = outer_acc

        # clear_output()
        
        end_time = time.time()
        exec_time = end_time - start_time
        minutes = exec_time // 60
        seconds = exec_time % 60
        print('Last turn took %i minutes and %f seconds.' % (minutes, seconds))
    
    clear_output()
    return inner_acc_report, outer_acc_report

# Training Classifier/Visualizing Accuracies

## Verifying Rank-Order Robustness

True accuracy of rank-order appears to lie around 0.533. <br>
For reference, using first block in rank-order produced accuracy of 0.540.

In [334]:
# Shuffle which block is used to rank other blocks within subject

path = r'scans/output/PRE/'
roi = 1                            # V1-roi: 0, MT-roi: 1
conds = [1, 3]                     # trained_cp: 0, trained_ip: 1, untrained_cp: 2, untrained_ip: 3
block_length = 624

gamma_range = {'start': -13, 'stop': 1, 'num': 16, 'base': 2.0}
C_range = {'start': -3, 'stop': 11, 'num': 16, 'base': 2.0}
kernels = ['rbf', 'sigmoid']

inner_samples = []
outer_samples = []
for runs in range(10):
    print(f'On run {runs+1}.')
    
    inner_accs, outer_accs = train(path, roi, conds, block_length, kernels, gamma_range, C_range, num_inner=1, rank_first=True)
    inner_samples.append(df_to_arr(inner_accs))
    outer_samples.append(df_to_arr(outer_accs))
    
#inner_accs.to_csv('output/rank/inner_accs16_bshuff.csv')
#outer_accs.to_csv('output/rank/outer_accs16_bshuff.csv')

## Folds Testing

In [93]:
path = r'scans/output/PRE/'
roi = 1                            # V1-roi: 0, MT-roi: 1
conds = [1, 3]                     # trained_cp: 0, trained_ip: 1, untrained_cp: 2, untrained_ip: 3
block_length = 624

gamma_range = {'start': -13, 'stop': 1, 'num': 32, 'base': 2.0}
C_range = {'start': -3, 'stop': 11, 'num': 32, 'base': 2.0}
kernels = ['rbf', 'sigmoid']

inner_accs, outer_accs = train(path, roi, conds, block_length, kernels, gamma_range, C_range, num_inner=2, rank_first=True)

inner_accs.to_csv('output/rank/inner_accs32_2inner.csv')
outer_accs.to_csv('output/rank/outer_accs32_2inner.csv')

## Miscellaneous Parameters

In [59]:
# Original GS64 Run

path = r'scans/output/PRE/'
roi = 1                            # V1-roi: 0, MT-roi: 1
conds = [1, 3]                     # trained_cp: 0, trained_ip: 1, untrained_cp: 2, untrained_ip: 3
block_length = 624

gamma_range = {'start': -13, 'stop': 1, 'num': 64, 'base': 2.0}
C_range = {'start': -3, 'stop': 11, 'num': 64, 'base': 2.0}
kernels = ['rbf', 'sigmoid']

inner_accs, outer_accs = train(path, roi, conds, block_length, kernels, gamma_range, C_range)

inner_accs.to_csv('output/inner_accs64.csv', sep='\t')
outer_accs.to_csv('output/outer_accs64.csv', sep='\t')

In [14]:
# Expanded GS32 Run

path = r'scans/output/PRE/'
roi = 1                            # V1-roi: 0, MT-roi: 1
conds = [1, 3]                     # trained_cp: 0, trained_ip: 1, untrained_cp: 2, untrained_ip: 3
block_length = 624

gamma_range = {'start': -15, 'stop': 5, 'num': 32, 'base': 2.0}
C_range = {'start': -5, 'stop': 15, 'num': 32, 'base': 2.0}
kernels = ['rbf', 'sigmoid']

inner_accs, outer_accs = train(path, roi, conds, block_length, kernels, gamma_range, C_range)

inner_accs.to_csv('output/inner_accs32_more.csv', sep='\t')
outer_accs.to_csv('output/outer_accs32_more.csv', sep='\t')

In [206]:
# Rechecking validity of ranking system

path = r'scans/output/PRE/'
roi = 1                            # V1-roi: 0, MT-roi: 1
conds = [1, 3]                     # trained_cp: 0, trained_ip: 1, untrained_cp: 2, untrained_ip: 3
block_length = 624

gamma_range = {'start': -13, 'stop': 1, 'num': 16, 'base': 2.0}
C_range = {'start': -3, 'stop': 11, 'num': 16, 'base': 2.0}
kernels = ['rbf', 'sigmoid']

inner_accs, outer_accs = train(path, roi, conds, block_length, kernels, gamma_range, C_range)

inner_accs.to_csv('output/inner_accs16_testrank.csv', sep='\t')
outer_accs.to_csv('output/outer_accs16_testrank.csv', sep='\t')

In [422]:
path = r'scans/output/PRE/'
roi = 1                            # V1-roi: 0, MT-roi: 1
conds = [1, 3]                     # trained_cp: 0, trained_ip: 1, untrained_cp: 2, untrained_ip: 3
block_length = 624

gamma_range = {'start': -11, 'stop': 3, 'num': 15, 'base': 2.0}
C_range = {'start': -3, 'stop': 11, 'num': 15, 'base': 2.0}
kernels = ['rbf', 'sigmoid']

data_params = {'path': r'scans/output/PRE/', 'roi': 1, 'conds': [1, 3], 'block_length': 624}
grid_params = {'gamma': gamma_range, 'C': C_range, 'kernels': kernels}

inner_accs, outer_accs = train(data_params, grid_params, num_inner=1, rank_first=True)

In [414]:
path = r'scans/output/PRE/'
roi = 1                            # V1-roi: 0, MT-roi: 1
conds = [1, 3]                     # trained_cp: 0, trained_ip: 1, untrained_cp: 2, untrained_ip: 3
block_length = 624

gamma_range = {'start': -15, 'stop': 3, 'num': 19, 'base': 2.0}
C_range = {'start': -3, 'stop': 15, 'num': 19, 'base': 2.0}
kernels = ['rbf', 'sigmoid']

data_params = {'path': r'scans/output/PRE/', 'roi': 1, 'conds': [1, 3], 'block_length': 624}
grid_params = {'gamma': gamma_range, 'C': C_range, 'kernels': kernels}

inner_samples = []
outer_samples = []
for runs in range(10):
    print(f'On run {runs+1}.')
    
    inner_accs, outer_accs = train(data_params, grid_params, num_inner=1, rank_first=True, shuffle=True)
    inner_samples.append(df_to_arr(inner_accs))
    outer_samples.append(df_to_arr(outer_accs))


In [418]:
np.mean(inner_samples)

0.793102297008547

# Permutations

In [151]:
def permutation(data_params, grid_params, inner_dist, outer_dist, runs, classes=None):
    
    '''
    Runs several runs of SVM training and testing using shuffled (randomized) labeled data
    
    Parameters
    ----------
    data_params: dict
        path: str
            the path to the data files
        roi: int
            0 for V1 data, 1 for MT data
        conds: list
            list of integers specifying the conditional datasets to extract
            (0 for trained_cp, 1 for trained_ip, 2 for untrained_cp, 3 for untrained_ip)    
        block_length: int
            the number of voxels to standardize every block in the dataset to
    grid_params: dict
        kernels: list
            kernels to test (recommended options are 'linear', 'rbf', and 'sigmoid')
        gamma: dict
            dict that specifies the range of values of gamma to test; should include start, stop to range,
            num of values, and the exponential base
        C: dict
            dict that specifies the range of values of C to test; should include start, stop to range,
            num of values, and the exponential base
    inner_dist: list
        stores the inner accuracy test subject values
    outer_dist: list
        sotres the outer accuracy test subject values
    runs: int
        how many runs to perform
    classes: list
        the two different labels of data to use
    '''
    
    for n in range(runs):
        print(f'On run #{n+1}.')
        inner_accs, outer_accs = train(path, roi, conds, block_length, kernels, gamma_range, C_range, num_inner=1, scramble=True, classes=classes, rank_first=True)
        
        vals = []
        for column in inner_accs:
            vals.extend(inner_accs[column].tolist())
        vals = [x for x in vals if str(x) != 'nan']
        inner_dist.extend(vals)
        
        vals = []
        for column in outer_accs:
            vals.extend(outer_accs[column].tolist())
        vals = [x for x in vals if str(x) != 'nan']
        outer_dist.extend(vals)
        

In [189]:
path = r'scans/output/PRE/'
roi = 1                            # V1-roi: 0, MT-roi: 1
conds = [1, 3]                     # trained_cp: 0, trained_ip: 1, untrained_cp: 2, untrained_ip: 3
block_length = 624

gamma_range = {'start': -13, 'stop': 1, 'num': 32, 'base': 2.0}
C_range = {'start': -3, 'stop': 11, 'num': 32, 'base': 2.0}
kernels = ['rbf', 'sigmoid']
classes = ['trained_ip', 'untrained_ip']

inner_dist = []
outer_dist = []

permutation(path, roi, conds, block_length, kernels, gamma_range, C_range, inner_dist, outer_dist, 10, classes)

np.save('output/permutations/outer_dist32.npy', outer_dist)
np.save('output/permutations/inner_dist32.npy', inner_dist)

# Train with All

In [53]:
def train_with_all(path, roi, conds, block_length, kernels, gamma_range, C_range, scramble=False):
    
    '''
    Trains and tests the classifier for accuracy using entire dataset.
    
    Parameters
    ----------
    path: str
        the path to the data files
    roi: int
        0 for V1 data, 1 for MT data
    conds: list
        list of integers specifying the conditional datasets to extract
        (0 for trained_cp, 1 for trained_ip, 2 for untrained_cp, 3 for untrained_ip)    
    block_length: int
        the number of voxels to standardize every block in the dataset to
    kernels: list
        kernels to test (recommended options are 'linear', 'rbf', and 'sigmoid')
    gamma_range: dict
        dict that specifies the range of values of gamma to test; should include start, stop to range,
        num of values, and the exponential base
    C_range: dict
        dict that specifies the range of values of C to test; should include start, stop to range,
        num of values, and the exponential base
    scramble: boolean, optional
        whether or not to scramble the labels when training, default is False
        
    Returns
    -------
    DataFrame
        data of inner subject testing accuracy
    DataFrame
        data of outer subject testing accuracy
    '''
    
    subjects, suffix = get_subjects(path)
    
    inner_acc_report = pd.DataFrame(index=subjects, columns=subjects)
    outer_acc_report = pd.DataFrame(index=subjects, columns=subjects)
    
    for outer_subject in range(len(subjects)):
        
        print("Currently on outer subject #%i." % (outer_subject+1))

        start_time = time.time()
        
        for inner_subject in range(len(subjects)):

            if inner_subject == outer_subject:
                continue

            print("Currently on inner subject #%i." % (inner_subject+1))    
            x_train, y_train, x_test_inner, y_test_inner, x_test_outer, y_test_outer = generate_data(subjects, inner_subject, outer_subject, path, suffix, roi, conds, block_length)
            
            x_whole = np.vstack((x_train, x_test_inner, x_test_outer))
            y_whole = np.concatenate((y_train, y_test_inner, y_test_outer))
            
            if scramble:
                scramble_labels(y_train, classes)
                
            # gets optimal params for training dataset from grid search
            params, inner_acc = get_optimal_run(x_whole, y_whole, x_test_inner, y_test_inner, kernels, gamma_range, C_range) 
            print('Found best params for current inner subject.')
            
            # train model using optimal params for this set
            svclassifier = SVC(kernel=params['kernel'], gamma=params['gamma'], C=params['C'], max_iter=-1)
            svclassifier.fit(x_whole, y_whole)
            
            print('Testing outer subject...')
            outer_acc = svclassifier.score(x_test_outer, y_test_outer)
            
            # logs inner and outer subject accuracy data in dataframe
            index = subjects[outer_subject]
            col = subjects[inner_subject]
            
            inner_acc_report.at[index, col] = inner_acc
            outer_acc_report.at[index, col] = outer_acc

        clear_output()
        
        end_time = time.time()
        exec_time = end_time - start_time
        minutes = exec_time // 60
        seconds = exec_time % 60
        print('Last turn took %i minutes and %f seconds.' % (minutes, seconds))
    
    clear_output()
    return inner_acc_report, outer_acc_report

[2, 3, 1, 0]

In [None]:
path = r'scans/output/PRE/'
roi = 1                            # V1-roi: 0, MT-roi: 1
conds = [1, 3]                     # trained_cp: 0, trained_ip: 1, untrained_cp: 2, untrained_ip: 3
block_length = 624

gamma_range = {'start': -13, 'stop': 1, 'num': 32, 'base': 2.0}
C_range = {'start': -3, 'stop': 11, 'num': 32, 'base': 2.0}
kernels = ['rbf', 'sigmoid']

inner_accs, outer_accs = train_with_all(path, roi, conds, block_length, kernels, gamma_range, C_range)