# Build the Classifier
This notebook is dedicated to creating classifiers and run classification analyses of interest on neuroimaging data.

Can we accurately classify:
- adults vs. children
- condition within adults
- condition within children

In [None]:
from pandas import DataFrame, Series, read_csv

# Study specific variables
study_home = '/home/camachocm2/Analysis/KidVid_MVPA'
standard_mask = study_home + '/template/MNI152_T1_2mm_brain_mask_KV.nii.gz'
sub_data_file = study_home + '/doc/subjectinfo.csv'
preproc_dir = study_home + '/analysis/preproc/betas'
output_dir = study_home + '/analysis/classifier'

conditions = read_csv(study_home + '/doc/conditionslist.csv')
condition_labels = condition_list['labels'].tolist()

subject_info = read_csv(sub_data_file)
subjects_list = subject_info['subjID'].tolist()

In [None]:
## Create a conditions list for the feature set
conditions['subject'] = Series(subjects_list[0], index=conditions.index)

for sub in subjects_list[1:]:
    temp=DataFrame()
    temp['labels'] = Series(condition_labels)
    temp['subject'] = Series(sub, index=temp.index)
    conditions = conditions.append(temp, ignore_index=True)

conditions.describe()

In [None]:
## Temporally concatenate all the parameter estimates from preproc to create a feature set
from glob import glob
from nipype.interfaces.fsl.utils import Merge
files = glob(preproc_dir + '/*/betas.nii.gz')
files = sorted(files)

bold_feature_data = output_dir + '/merged_features.nii.gz'

merge = Merge()
merge.inputs.in_files = files
merge.inputs.dimension = 't'
merge.inputs.merged_file = bold_feature_data
merge.run()

## Perform the actual support vector classification

In [None]:
# Perform the support vector classification
from nilearn.input_data import NiftiMasker
from sklearn.svm import SVC
from sklearn.feature_selection import SelectPercentile, f_classif
from sklearn.pipeline import Pipeline
svc = SVC(kernel='linear')

masker = NiftiMasker(mask_img=standard_mask, smoothing_fwhm=None,
                     standardize=True, memory="nilearn_cache", memory_level=1)
X = masker.fit_transform(bold_feature_data)

feature_selection = SelectPercentile(f_classif, percentile=5)

anova_svc = Pipeline([('anova', feature_selection), ('svc', svc)])

anova_svc.fit(X, conditions)
y_pred = anova_svc.predict(X)

## Obtain prediction values via cross validation

In [None]:
# Obtain prediction values via cross validation
from sklearn.cross_validation import LeaveOneLabelOut, cross_val_score

cv = LeaveOneLabelOut(session['subject'])
cv_scores = cross_val_score(anova_svc, X, conditions, cv=cv)
classification_accuracy = cv_scores.mean()

print("Classification accuracy: %.4f / Chance level: %f" %
      (classification_accuracy, 1. / len(conditions.unique())))

## Visualize the SVM weights