<font size="+12"><center>
    MVPA analysis: SVM
</font></center>

Author:
Egor Ananyev

# Preparation

## Loading packages

In [1]:
%matplotlib qt
import numpy as np
import matplotlib.pyplot as plt
import mne
import pandas as pd
import os

In [2]:
mne.set_log_level('warning')  # less verbose output

## Setting parameters

In [3]:
interactive = False  # Whether to render interactive figures such as channel plots
# ...should be set to True on first pass

debug = False

cur_subj = 1
numof_runs = 7  # TEMP; is 7 for subj 01

## Loading evoked data

In [4]:
# data_path = os.path.expanduser("C:\\Users\\egora\\Downloads\\meg\\")
data_path = os.path.expanduser('E:\\meg\\')

cur_subj_str = str(cur_subj).zfill(2)

for cur_run in range(numof_runs):
    cur_run_str = str(cur_run+1).zfill(2)
    fname = os.path.join(data_path,
                             'derivatives\\meg_derivatives\\sub-' + cur_subj_str + '\\ses-meg\\meg\\sub-' + \
                             cur_subj_str + '_ses-meg_experimental_run-' + cur_run_str + '_proc-sss_300_epo.fif')
    print(fname)
    epochs_run = mne.read_epochs(fname)
    # example append: https://www.programcreek.com/python/example/92634/mne.Epochs
    epochs_run = epochs_run.pick_types(meg=True)
    epochs_run_df = epochs_run.to_data_frame()
    # Normalizing the signal by the baseline:
    epochs_run_std = epochs_run_df.sort_index(level=['condition', 'epoch', 'time'], ascending=[1, 1, 1])
    epochs_run_std = epochs_run_std.loc[pd.IndexSlice[:, :, -100:-1], :].groupby(['condition', 'epoch']).std()
    epochs_run_norm = epochs_run_df / epochs_run_std
    if cur_run == 0:
        epochs_df = epochs_run_norm
    else: 
        epochs_df = epochs_df.append(epochs_run_norm)

E:\meg\derivatives\meg_derivatives\sub-01\ses-meg\meg\sub-01_ses-meg_experimental_run-01_proc-sss_300_epo.fif
E:\meg\derivatives\meg_derivatives\sub-01\ses-meg\meg\sub-01_ses-meg_experimental_run-02_proc-sss_300_epo.fif
E:\meg\derivatives\meg_derivatives\sub-01\ses-meg\meg\sub-01_ses-meg_experimental_run-03_proc-sss_300_epo.fif
E:\meg\derivatives\meg_derivatives\sub-01\ses-meg\meg\sub-01_ses-meg_experimental_run-04_proc-sss_300_epo.fif
E:\meg\derivatives\meg_derivatives\sub-01\ses-meg\meg\sub-01_ses-meg_experimental_run-05_proc-sss_300_epo.fif
E:\meg\derivatives\meg_derivatives\sub-01\ses-meg\meg\sub-01_ses-meg_experimental_run-06_proc-sss_300_epo.fif
E:\meg\derivatives\meg_derivatives\sub-01\ses-meg\meg\sub-01_ses-meg_experimental_run-07_proc-sss_300_epo.fif


# Support Vector Machines

In [18]:
from sklearn import svm

In [9]:
all_times = np.round(epochs_run.times * 1000).astype(int)
# print(all_times)
print(np.shape(all_times))

(211,)


# Cross-validation

In [20]:
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report

In [11]:
cv_times = all_times[30:76]   # np.round(epochs_run.times * 1000).astype(int)
cv_times

array([  0,   3,   7,  10,  13,  17,  20,  23,  27,  30,  33,  37,  40,
        43,  47,  50,  53,  57,  60,  63,  67,  70,  73,  77,  80,  83,
        87,  90,  93,  97, 100, 103, 107, 110, 113, 117, 120, 123, 127,
       130, 133, 137, 140, 143, 147, 150])

In [12]:
tuned_parameters = [
    {'C': [1, 10, 100, 1000], 'kernel': ['linear']},
    {'C': [1, 10, 100, 1000], 'kernel': ['rbf'], 'gamma': [0.001, 0.0001]},
    {'C': [1, 10, 100, 1000], 'kernel': ['poly'], 'degree': [2, 3], 'gamma': [0.001, 0.0001]},
    {'C': [1, 10, 100, 1000], 'kernel': ['sigmoid'], 'gamma': [0.001, 0.0001]},
]

In [13]:
# Taking values for a single timepoint for now:
this_t = 100
X_right = epochs_df[epochs_df.index.get_level_values('condition').str.contains('right')].loc[pd.IndexSlice[:, :, this_t], :]
X_left = epochs_df[epochs_df.index.get_level_values('condition').str.contains('left')].loc[pd.IndexSlice[:, :, this_t], :]
X = np.concatenate((X_right, X_left))
y = np.concatenate((np.repeat(1, len(X_right)), np.repeat(0, len(X_left))))

In [14]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [15]:
np.shape(X_train)

(805, 306)

In [21]:
np.shape(X_test)

(202, 306)

In [None]:
scores = ['precision', 'recall']

for score in scores:
    print("# Tuning hyper-parameters for %s" % score)
    print()

    clf = GridSearchCV(
        svm.SVC(cache_size=2000), tuned_parameters, scoring='%s_macro' % score
    )
    clf.fit(X_train, y_train)

    print("Best parameters set found on development set:")
    print()
    print(clf.best_params_)
    print()
    print("Grid scores on development set:")
    print()
    means = clf.cv_results_['mean_test_score']
    stds = clf.cv_results_['std_test_score']
    for mean, std, params in zip(means, stds, clf.cv_results_['params']):
        print("%0.3f (+/-%0.03f) for %r"
              % (mean, std * 2, params))
    print()

    print("Detailed classification report:")
    print()
    print("The model is trained on the full development set.")
    print("The scores are computed on the full evaluation set.")
    print()
    y_true, y_pred = y_test, clf.predict(X_test)
    print(classification_report(y_true, y_pred))
    print()

# Tuning hyper-parameters for precision



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

Best parameters set found on development set:

{'C': 1, 'gamma': 0.001, 'kernel': 'sigmoid'}

Grid scores on development set:

0.480 (+/-0.043) for {'C': 1, 'kernel': 'linear'}
0.509 (+/-0.032) for {'C': 10, 'kernel': 'linear'}
0.496 (+/-0.069) for {'C': 100, 'kernel': 'linear'}
0.515 (+/-0.075) for {'C': 1000, 'kernel': 'linear'}
0.492 (+/-0.053) for {'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}
0.464 (+/-0.051) for {'C': 1, 'gamma': 0.0001, 'kernel': 'rbf'}
0.494 (+/-0.058) for {'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}
0.501 (+/-0.065) for {'C': 10, 'gamma': 0.0001, 'kernel': 'rbf'}
0.497 (+/-0.069) for {'C': 100, 'gamma': 0.001, 'kernel': 'rbf'}
0.488 (+/-0.043) for {'C': 100, 'gamma': 0.0001, 'kernel': 'rbf'}
0.497 (+/-0.069) for {'C': 1000, 'gamma': 0.001, 'kernel': 'rbf'}
0.509 (+/-0.067) for {'C': 1000, 'gamma': 0.0001, 'kernel': 'rbf'}
0.498 (+/-0.080) for {'C': 1, 'degree': 2, 'gamma': 0.001, 'kernel': 'poly'}
0.253 (+/-0.003) for {'C': 1, 'degree': 2, 'gamma': 0.0001, 'kernel': 