In [1]:
from __future__ import division
import os
from os.path import join
import numpy as np
import pandas as pd

%pylab inline

Populating the interactive namespace from numpy and matplotlib


In [2]:
# paths to subject dir
subj = '13034'
dataDir = '../data'

# Classification 
Classification analyses on one subject. This notebook walks through the steps for prepping the data, and setting up a classification.

This experiment was a single run task with 48 trials. On each trial, subjects were presented with a stimulus in the form of either a **Word** or a **Picture** (24 stims from each modality).

Stimuli represented either **Dwellings** or **Tools** (equally balanced across modalities). 

There were 8 unique stimuli. 

** Stimuli Breakdown:**

* 24 Words
    * 12 Dwellings
        * 4 stims (as words), repeated 3x each
    * 12 Tools 
        * 4 stims (as words), repeated 3x each
* 24 Pics
    * 12 Dwellings
        * 4 stims (as pics), repeated 3x each
    * 12 Tools
        * 4 stims (as pics), repeated 3x each


The analyses will focus on classifying between different combinations of these stimuli representations (e.g. classifying between words and pics)

# Preprocessing

---
## Prepare the attributes
Prepare a set of volume labels that can be used to slice the dataset up for different analyses.  

In [3]:
# read trialOnsets file
trialOnsets = pd.read_table(join(dataDir, subj, (subj + '_trialOnsets.txt')))
trialOnsets.head()

Unnamed: 0,TrialOnset,Modality,Category,Stimulus
0,2000,Word,dwelling,castle
1,11995,Word,tool,saw
2,22007,Word,dwelling,house
3,32002,Word,dwelling,tent
4,41980,Word,tool,screwdriver


In [4]:
# drop the trial onset times, as we don't need that
trials = trialOnsets.drop('TrialOnset', axis=1)
trials.shape

(48, 3)

### Make additional columns for category by word/pic and stim by word/pic

In [5]:
def catByWord(row):
    if row['Modality'] == 'Word':
        label = row['Category'] + 'Word'
    else:
        label = 'n'
    return label

def catByPic(row):
    if row['Modality'] == 'Picture':
        label = row['Category'] + 'Pic'
    else:
        label = 'n'
    return label

def stimByWord(row):
    if row['Modality'] == 'Word':
        label = row['Stimulus'] + 'Word'
    else:
        label = 'n'
    return label

def stimByPic(row):
    if row['Modality'] == 'Picture':
        label = row['Stimulus'] + 'Pic'
    else:
        label = 'n'
    return label

In [6]:
trials['categoryWords'] = trials.apply(catByWord, axis=1)
trials['categoryPics'] = trials.apply(catByPic, axis=1)
trials['stimulusWords'] = trials.apply(stimByWord, axis=1)
trials['stimulusPics'] = trials.apply(stimByPic, axis=1)

Here is the table that contains all of our sample labels

In [7]:
trials.head()

Unnamed: 0,Modality,Category,Stimulus,categoryWords,categoryPics,stimulusWords,stimulusPics
0,Word,dwelling,castle,dwellingWord,n,castleWord,n
1,Word,tool,saw,toolWord,n,sawWord,n
2,Word,dwelling,house,dwellingWord,n,houseWord,n
3,Word,dwelling,tent,dwellingWord,n,tentWord,n
4,Word,tool,screwdriver,toolWord,n,screwdriverWord,n


## Load the subject's brain data

For this analysis, the functional data has been preprocessed with the following steps:

* motion correction
* non-brain removal with BET (along with func mask creation)
* 4mm smoothing
* highpass filtered (Gaussian least-squares straight-line fitting, sigma=40.0s)

In addition, we ran first level GLMs to extract SPMs that represented single trial parameter estimates. For each trial, a model was fit with two regressors: 1) convolved regressor representing THAT trial, and 2) convolved regressor representing ALL OTHER trials. The unique parameter estimate map for each trial was extracted and merged across time to produce a 4D file. There are 48 'timepts' in this 4D file, each representing the whole brain parameter map for a single trial

The 4D single-trial parameter maps will be used as the input dataset for our classification

In [8]:
# import tools to read nifti data
import nilearn as nl
import nibabel as nib

In [9]:
subj_dataDir = join(dataDir, subj)
bold_file = join(subj_dataDir, 'singleTrialGLM/singleTrialPEs.nii.gz')
mask_file = join(subj_dataDir, 'masks/TDSL2_brain_mask.nii.gz')

In [10]:
# create a Nilearn Masker...this will convert the dataset to a 2D array
# as well as standardize the voxel timeseries
from nilearn.input_data import NiftiMasker

In [11]:
masker = NiftiMasker(mask_img=mask_file, standardize=True)
fmri_masked = masker.fit_transform(bold_file)
fmri_masked.shape

(48, 25182)

# Start Classifying

### Start simple - whole brain SVM on Words vs Pics
See how well an SVM can discrimate between WORD trials and PIC trials using the entire brain volume as input

In [15]:
# load decoder
from sklearn.svm import SVC

# import cross validation tool
from sklearn.cross_validation import KFold



In [17]:
cv = KFold(n=len(fmri_masked), n_folds=5)

for train, test in cv:
    print(train)

[10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34
 35 36 37 38 39 40 41 42 43 44 45 46 47]
[ 0  1  2  3  4  5  6  7  8  9 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34
 35 36 37 38 39 40 41 42 43 44 45 46 47]
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 30 31 32 33 34
 35 36 37 38 39 40 41 42 43 44 45 46 47]
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 25 26 27 28 29 39 40 41 42 43 44 45 46 47]
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 25 26 27 28 29 30 31 32 33 34 35 36 37 38]


In [13]:
# grab the labels you want to classify based on
conditions = trials['Modality']


In [14]:
conditions

0        Word
1        Word
2        Word
3        Word
4        Word
5        Word
6        Word
7        Word
8        Word
9        Word
10       Word
11       Word
12       Word
13       Word
14       Word
15       Word
16       Word
17       Word
18       Word
19       Word
20       Word
21       Word
22       Word
23       Word
24    Picture
25    Picture
26    Picture
27    Picture
28    Picture
29    Picture
30    Picture
31    Picture
32    Picture
33    Picture
34    Picture
35    Picture
36    Picture
37    Picture
38    Picture
39    Picture
40    Picture
41    Picture
42    Picture
43    Picture
44    Picture
45    Picture
46    Picture
47    Picture
Name: Modality, dtype: object