 Add description here.

In [1]:
import matplotlib.pyplot as plt
from scipy.io import (loadmat, savemat)
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import StratifiedKFold, cross_val_score
from sklearn.pipeline import make_pipeline, Pipeline
from sklearn.metrics import classification_report
from sklearn.svm import SVC
from sklearn.cluster import KMeans, MiniBatchKMeans
from sklearn.metrics import confusion_matrix
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_classif, chi2
from sklearn.metrics import roc_auc_score
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.utils import class_weight
import numpy as np
from mne.datasets import sample
from mne.decoding import (SlidingEstimator, GeneralizingEstimator,
                          cross_val_multiscore, LinearModel, get_coef)
from sklearn.preprocessing import LabelEncoder            

In [22]:
def scorer(y_true, y_pred):
    # Probabilistic estimates are reported for each class. In our case 
    # `y_pred` shape is (n_trials, 2), where `y[:, 0] = 1 - y[:, 1]`.
    return roc_auc_score(y_true, y_pred[:, 1]) 

## This code is meant to work on WINDOWS. Giulia has a version that works on MAC. 
def load_cor(xDir, var_name='struct_cor'):
    import mne
    from mne import create_info
    from mne.epochs import EpochsArray
    import scipy.io as sio
    import numpy as np
    # load Matlab/Fieldtrip data
    mat = sio.loadmat(xDir, squeeze_me=True, struct_as_record=False)
    ft_data = mat[var_name]
    event = ft_data.trialinfo[:, 1]

    # convert to mne
    n_trial, n_chans, n_time = ft_data.trial.shape
    data = np.zeros((n_trial, n_chans, n_time))
    data = ft_data.trial

    sfreq = 200
    time = ft_data.time

    
    coi = range(n_chans)
    data = data[:, coi, :]
    chan_names = [l.encode('ascii') for l in ft_data.label[coi]]
    chan_types = ft_data.label[coi]
    chan_types[:] = 'eeg'
    info = create_info(chan_names, sfreq, chan_types)
    events = np.array([np.arange(n_trial), np.zeros(n_trial), event], int).T
    epochs = EpochsArray(data, info, events=events,
                         tmin=np.min(time), verbose=False)
    montage = mne.channels.read_montage('GSN-HydroCel-257')
    epochs.set_montage(montage)
    return epochs, ft_data.trialinfo


def plot_dimcomp(mean_scores):
    
    # scores are in the order of param_grid iteration, which is alphabetical
    mean_scores = mean_scores.reshape(len(C_OPTIONS), -1, len(N_FEATURES_OPTIONS))
    # select score for best C
    mean_scores = mean_scores.max(axis=0)
    bar_offsets = (np.arange(len(N_FEATURES_OPTIONS)) *
                   (len(reducer_labels) + 1) + .5)

    plt.figure()
    COLORS = 'bgrcmyk'
    for i, (label, reducer_scores) in enumerate(zip(reducer_labels, mean_scores)):
        plt.bar(bar_offsets + i, reducer_scores, label=label, color=COLORS[i])

    plt.title("Comparing feature reduction techniques")
    plt.xlabel('Reduced number of features')
    plt.xticks(bar_offsets + len(reducer_labels) / 2, N_FEATURES_OPTIONS)
    plt.ylabel('Digit classification AUC')
    plt.ylim((0, 1))
    plt.legend(loc='upper left')
    

def grid_dim_red(mainPath, name, varname, N_FEATURES_OPTIONS, C_OPTIONS):

    print('working on subject ' + name)
    
    filePath = mainPath + name + '\\'
    #loading labels for conditions
    yDir = filePath + 'trl_conditions.mat'
    Y = loadmat(yDir)
    conditions = Y['trl_conditions']
    Y = conditions.transpose().ravel()
    Y[Y==-1] = 0
    
    #loading data as epoch object
    print('loading data...')
    xDir = filePath + varname + '.mat'
    epochs, _ = load_cor(xDir, var_name=varname)
    
    #Retrieving data as matrix
    data = epochs.get_data()
    X = np.reshape(data, [data.shape[0], data.shape[1]*data.shape[2]])
    ## Making pipeline

    # Set up possible values of parameters to optimize over
    print('defining reduction techniques')
    param_grid = [
        {
            'reduce_dim': [PCA(iterated_power=7)],
            'reduce_dim__n_components': N_FEATURES_OPTIONS,
            'classify__C': C_OPTIONS
        },  
        {
            'reduce_dim': [SelectKBest(f_classif)],
            'reduce_dim__k': N_FEATURES_OPTIONS,
            'classify__C': C_OPTIONS
        },  
        {
             'reduce_dim': [MiniBatchKMeans()],
            'reduce_dim__n_clusters': N_FEATURES_OPTIONS,
            'classify__C': C_OPTIONS
        },     
        
    ]
    reducer_labels = ['PCA', 'KBest(f_classif)', 'Clustering (K-means)']
    
    
    pipe = Pipeline([
        ('scaling', StandardScaler()),
        ('reduce_dim', SelectKBest(f_classif)),
        ('classify',  SVC(class_weight='balanced', probability=False, kernel='linear'))
    ])
    
    
    # Defining cv folds parameter
    inner_cv = StratifiedKFold(n_splits=5, shuffle=True)
    
    print('training gridsearch')
    grid = GridSearchCV(pipe, cv=inner_cv, n_jobs=1, param_grid=param_grid, scoring='roc_auc')
    
    print('saving results')
    reduction_results = np.array(grid.cv_results_['mean_test_score'])
    np.save(filePath + 'reduction_results', reduction_results) 
    return grid, reduction_results


In [None]:
subject = os.listdir('C:\\Users\\Ana\\Desktop\\CI\\Python\\Subjects')
subject = np.sort(subject)

In [19]:
N_FEATURES_OPTIONS = [10, 20, 30, 40, 50, 60, 10]
C_OPTIONS = [1]

grid, reduction_results = grid_dim_red('C:\\Users\\Ana\\Desktop\\CI\\Python\\Subjects\\', 'DB171120v03HT', 'struct_cor', N_FEATURES_OPTIONS, C_OPTIONS)


working on subject DB171120v03HT
loading data...
defining reduction techniques
training gridsearch
saving results


AttributeError: 'GridSearchCV' object has no attribute 'cv_results_'

In [23]:
 grid = GridSearchCV(pipe, cv=inner_cv, n_jobs=1, param_grid=param_grid, scoring='roc_auc')
   

NameError: name 'pipe' is not defined