### **Analysis of Classifiers**
Date: Mar 9, 2019 | Version: 2.1

Contains code for event related analysis.

#### **Code**

In [2]:
import os
import pandas as pd
import numpy as np
import nipy
from mvpa2.suite import * # Production code should import functions individually.

#### Load Data

In [4]:
cwd = '/home/fmri-data'
data_path = os.path.join(cwd,"mvpa_1")

# Load the sample attributes
attr_fname = "targets_attr_mvpa_1.txt"
attr_path = os.path.join(data_path, attr_fname)
# attr = SampleAttributes(attr_path) # PyMVPA convenience function to read attribute files.

# Load all 10 runs from one subject into one fMRI dataset
file_path_list = list() # file path list to each run
for i in range(1,11):
    run_name = "run"+str(i)
    fname = run_name +"_native.nii.gz"
    filepath = os.path.join(data_path, fname)
    print filepath
    file_path_list.append(filepath)
    
# fMRI dataset format is convenient and vital for using PyMVPA.
ds = fmri_dataset(file_path_list,targets=attr.targets,chunks=attr.chunks)

/home/fmri-data/mvpa_1/run1_native.nii.gz
/home/fmri-data/mvpa_1/run2_native.nii.gz
/home/fmri-data/mvpa_1/run3_native.nii.gz
/home/fmri-data/mvpa_1/run4_native.nii.gz
/home/fmri-data/mvpa_1/run5_native.nii.gz
/home/fmri-data/mvpa_1/run6_native.nii.gz
/home/fmri-data/mvpa_1/run7_native.nii.gz
/home/fmri-data/mvpa_1/run8_native.nii.gz
/home/fmri-data/mvpa_1/run9_native.nii.gz
/home/fmri-data/mvpa_1/run10_native.nii.gz


In [3]:
fsel = SensitivityBasedFeatureSelection(
           OneWayAnova(),
           FixedNElementTailSelector(500, mode='select', tail='upper'))

In [4]:
fsel.train(ds)
ds = fsel(ds) # free up some memory by overwritting
print ds.shape # notice voxel count is much lower

(1900, 500)


  f = msb / msw
  f = msb / msw


In [5]:
poly_detrend(ds,polyord=1,chunks_attr='chunks')

  fit = np.linalg.lstsq(regs, ds.samples)


Convert continuous time series to list of events

In [6]:
events_orig = find_events(targets=ds.sa.targets, chunks=ds.sa.chunks)

A peek into what the list looks like:

In [7]:
for e in events_orig[:5]:
...    print e

{'chunks': 0.0, 'duration': 1, 'onset': 0, 'targets': 'baseline_catch_all'}
{'chunks': 0.0, 'duration': 1, 'onset': 1, 'targets': 'target_miss'}
{'chunks': 0.0, 'duration': 8, 'onset': 2, 'targets': 'baseline_catch_all'}
{'chunks': 0.0, 'duration': 1, 'onset': 10, 'targets': 'target_hit'}
{'chunks': 0.0, 'duration': 5, 'onset': 11, 'targets': 'baseline_catch_all'}


In [8]:
for ev in events_orig:
    ev['duration'] = 5

In [9]:
TR = np.median(np.diff(ds.sa.time_coords))
for ev in events_orig:
    ev['onset'] = (ev['onset'] * TR)
    ev['duration'] = ev['duration'] * TR

#### Subset event list
Create a subset for binary classification and another for multi-class classification

In [10]:
events_binary = [ev for ev in events_orig if ev['targets'] in ['target_hit', 'target_miss']]

In [11]:
events_multi = [ev for ev in events_orig if ev['targets'] in ['target_hit', 'target_miss','lure_false_alarm','lure_correct_rejection']] 

In [12]:
# checking what labels are in the dataset
ev_list = []
for ev in events_multi:
    ev_list.append(ev['targets'])

In [13]:
np.unique(ev_list, return_counts=True)

(array(['lure_correct_rejection', 'lure_false_alarm', 'target_hit',
        'target_miss'], dtype='|S22'), array([116,  24, 133,   7]))

#### Get GLM parameter estimates

In [14]:
evds_binary = fit_event_hrf_model(ds,
                           events_binary,
                           time_attr='time_coords',
                           condition_attr=('onset','targets', 'chunks'))

evds_multi = fit_event_hrf_model(ds,
                           events_multi,
                           time_attr='time_coords',
                           condition_attr=('onset','targets', 'chunks'))

  n_hr)
  time_stamps = np.linspace(0, time_length, float(time_length) / dt)


In [15]:
evds_multi.shape

(280, 500)

In [16]:
evds_binary.shape

(140, 500)

In [17]:
zscore(evds_binary, chunks_attr=None)
zscore(evds_multi, chunks_attr=None)

#### Multi-class classification

#### Logistic Regression (Multi-class classification)

In [18]:
from sklearn.linear_model.logistic import LogisticRegression 

In [19]:
base_clf_skl_logreg = LogisticRegression(multi_class='multinomial',solver='lbfgs')
# Default solver is liblinear which doesn't support multinomial loss function?

skl_logreg_clf=SKLLearnerAdapter(base_clf_skl_logreg)

In [20]:
cv_logreg_skl = CrossValidation(skl_logreg_clf, NFoldPartitioner(),
                             errorfx=mean_match_accuracy,
                            enable_ca=['stats'])

In [21]:
cv_logreg_skl_results = cv_logreg_skl(evds_multi)
print np.mean(cv_logreg_skl_results) # mean accuracy
print cv_logreg_skl.ca.stats.matrix

  if not np.issubdtype(attr.dtype, str) and not self.mapnumeric:


0.40714285714285714
[[55 14 71  3]
 [ 2  0  3  0]
 [59 10 59  4]
 [ 0  0  0  0]]


  stats['PPV'] = stats['TP'] / (1.0*stats["P'"])
  stats['FDR'] = stats['FP'] / (1.0*stats["P'"])
  for m in mat_all]
  for m in mat_all]


#### LASSO Multinomial Logistic Regression (Multi-class classification)

In [22]:
from sklearn.linear_model import LogisticRegression

In [23]:
skl_lasso = LogisticRegression(C=0.5,penalty='l1',multi_class='multinomial'
                               ,solver='saga',max_iter=10000)
wrapped_lasso_clf=SKLLearnerAdapter(skl_lasso)
cv_lasso = CrossValidation(wrapped_lasso_clf, NFoldPartitioner(),
                             errorfx=mean_match_accuracy,
                            enable_ca=['stats'])

In [24]:
cv_lasso_results = cv_lasso(evds_multi) 
print np.mean(cv_lasso_results)
cv_lasso.ca.stats.matrix

0.41785714285714287


array([[61, 11, 72,  2],
       [ 2,  1,  6,  1],
       [53, 12, 55,  4],
       [ 0,  0,  0,  0]])

#### PyMVPA KNN Classifier (Multi-class classification)

In [25]:
clfKNN_mvpa = mvpa2.clfs.knn.kNN(k=5, dfx=one_minus_correlation, voting='majority')

In [26]:
cv_knn_mvpa = CrossValidation(clfKNN_mvpa, NFoldPartitioner(),
                             errorfx=mean_match_accuracy,
                            enable_ca=['stats'])

In [27]:
cv_knn_mvpa_results = cv_knn_mvpa(evds_multi)
print np.mean(cv_knn_mvpa_results) # mean accuracy
print cv_knn_mvpa.ca.stats.matrix # Confusion matrix

0.4357142857142858
[[52 13 61  3]
 [ 6  0  1  1]
 [58 11 70  3]
 [ 0  0  1  0]]


#### Scikit-learn KNN Classifier (Multiclass classification)

In [28]:
from sklearn import neighbors

In [29]:
k_neighbors = 5

# weights = 'uniform' means each neighbor has equal voting power so we can get simple majority
clf = neighbors.KNeighborsClassifier(k_neighbors, metric = 'euclidean',weights='uniform')
wrapped_clf=SKLLearnerAdapter(clf)

In [30]:
cv_knn_skl = CrossValidation(wrapped_clf, NFoldPartitioner(),
                             errorfx=mean_match_accuracy,
                            enable_ca=['stats'])

In [31]:
cv_knn_skl_results = cv_knn_skl(evds_multi)
print np.mean(cv_knn_skl_results) # mean accuracy
print cv_knn_skl.ca.stats.matrix # Confusion matrix

0.43571428571428567
[[82 17 92  1]
 [ 1  1  2  0]
 [33  6 39  6]
 [ 0  0  0  0]]


#### Scikit-learn Linear Kernel C-SVM (Multi-class classification)

In [32]:
from sklearn.svm import LinearSVC

In [33]:
base_skl_lsvc_clf = LinearSVC(C=1) 
# Try ovr vs ovc if doing multi-class classification
# Need to tune parameters C, tolerance, etc and try different kernels
# Also test different variants of svm

skl_lsvc_clf = SKLLearnerAdapter(base_skl_lsvc_clf)

In [34]:
cv_lsvc_skl = CrossValidation(skl_lsvc_clf, NFoldPartitioner(),
                             errorfx=mean_match_accuracy,
                            enable_ca=['stats'])

In [35]:
cv_lsvc_skl_results = cv_lsvc_skl(evds_multi)
print np.mean(cv_lsvc_skl_results) # mean accuracy
print cv_lsvc_skl.ca.stats.matrix

0.2571428571428571
[[34  6 32  2]
 [37  6 39  1]
 [31  5 29  1]
 [14  7 33  3]]


#### Binary Classification

In [36]:
cv_logreg_skl_results = cv_logreg_skl(evds_binary)
print np.mean(cv_logreg_skl_results) # mean accuracy
print cv_logreg_skl.ca.stats.matrix

0.95
[[133   7]
 [  0   0]]


  stats['NPV'] = stats['TN'] / (1.0*stats["N'"])
  ([0], np.cumsum(~t)/(~t).sum(dtype=np.float), [1]))
  ([0], np.cumsum(t)/t.sum(dtype=np.float), [1]))


In [37]:
cv_lasso_results = cv_lasso(evds_binary) 
print np.mean(cv_lasso_results)
print cv_lasso.ca.stats.matrix

0.95
[[133   7]
 [  0   0]]


In [38]:
cv_knn_mvpa_results = cv_knn_mvpa(evds_binary)
print np.mean(cv_knn_mvpa_results) # mean accuracy
print cv_knn_mvpa.ca.stats.matrix # Confusion matrix

0.95
[[133   7]
 [  0   0]]


In [39]:
cv_knn_skl_results = cv_knn_skl(evds_binary)
print np.mean(cv_knn_skl_results) # mean accuracy
print cv_knn_skl.ca.stats.matrix # Confusion matrix

0.95
[[133   7]
 [  0   0]]


In [40]:
cv_lsvc_skl_results = cv_lsvc_skl(evds_binary)
print np.mean(cv_lsvc_skl_results) # mean accuracy
print cv_lsvc_skl.ca.stats.matrix

0.09285714285714283
[[  6   0]
 [127   7]]
