In [1]:
from __future__ import division
import os
from os.path import join
import numpy as np
import pandas as pd

%pylab inline

Populating the interactive namespace from numpy and matplotlib


In [3]:
# paths to subject dir
subj = '13034'
dataDir = '../data'

## Prepare the attributes
Prepare a set of volume labels that can be used to slice the dataset up for different
analyses with PyMVPA. There needs to be one label per volume, but there can be multiple different 
labels assigned to the same volume. 

In [5]:
# read trialOnsets file
trialOnsets = pd.read_table(join(dataDir, subj, (subj + '_trialOnsets.txt')))
trialOnsets.head()

Unnamed: 0,TrialOnset,Modality,Category,Stimulus
0,2000,Word,dwelling,castle
1,11995,Word,tool,saw
2,22007,Word,dwelling,house
3,32002,Word,dwelling,tent
4,41980,Word,tool,screwdriver


In [68]:
# function to convert trial onsets into volume labels
def makeAttributes(trialOnsets):
    """
    Make an attributes table that labels every volume in the run according 
    to the different ways of sorting these stimuli (e.g. words vs pics, etc...)
    """
    # number of timepts to label
    nVols = 500
    volsPerTrial = 3  # how long was each trial (in volumes)
    
    # create a list with dummy values for every volume
    blankVolLabels = ['n'] * nVols
    
    ### make the copies for each category
    modLabels = blankVolLabels[:]
    catLabels = blankVolLabels[:]
    stimLabels = blankVolLabels[:]
    
    # loop over each trial in trialOnsets
    for i,row in trialOnsets.iterrows():
        # convert onset time to nearest vol index
        onset = int(np.round(row['TrialOnset']/1000))
        
        # update the labels, starting at onset, continuing through however many volsPerTrial
        modLabels[onset:(onset+volsPerTrial)] = [row['Modality']] * volsPerTrial
        catLabels[onset:(onset+volsPerTrial)] = [row['Category']] * volsPerTrial
        stimLabels[onset:(onset+volsPerTrial)] = [row['Stimulus']] * volsPerTrial
        
    # convert to a dataframe
    attributes_df = pd.DataFrame({'modality':modLabels, 'category':catLabels, 'stimulus':stimLabels})
    return attributes_df


In [69]:
# create the table of volume labels for this subject
attributes = makeAttributes(trialOnsets)

In [70]:
attributes.head()

Unnamed: 0,category,modality,stimulus
0,n,n,n
1,n,n,n
2,dwelling,Word,castle
3,dwelling,Word,castle
4,dwelling,Word,castle


### add attribute columns that seprate out category and stim labels by modality

In [71]:
def catByWord(row):
    if row['modality'] == 'Word':
        label = row['category'] + 'Word'
    else:
        label = 'n'
    return label

def catByPic(row):
    if row['modality'] == 'Picture':
        label = row['category'] + 'Pic'
    else:
        label = 'n'
    return label

def stimByWord(row):
    if row['modality'] == 'Word':
        label = row['stimulus'] + 'Word'
    else:
        label = 'n'
    return label

def stimByPic(row):
    if row['modality'] == 'Picture':
        label = row['stimulus'] + 'Pic'
    else:
        label = 'n'
    return label

In [72]:
attributes['categoryWords'] = attributes.apply(catByWord, axis=1)
attributes['categoryPics'] = attributes.apply(catByPic, axis=1)
attributes['stimulusWords'] = attributes.apply(stimByWord, axis=1)
attributes['stimulusPics'] = attributes.apply(stimByPic, axis=1)

In [73]:
attributes.head()

Unnamed: 0,category,modality,stimulus,categoryWords,categoryPics,stimulusWords,stimulusPics
0,n,n,n,n,n,n,n
1,n,n,n,n,n,n,n
2,dwelling,Word,castle,dwellingWord,n,castleWord,n
3,dwelling,Word,castle,dwellingWord,n,castleWord,n
4,dwelling,Word,castle,dwellingWord,n,castleWord,n


## Load the subject's functional data (along with whole brain mask)

In [76]:
from mvpa2.tutorial_suite import *

  from pandas.core import datetools


In [77]:
subjDataDir = join(dataDir, subj)

ds = fmri_dataset(join(subjDataDir, (subj + '_TDSL2.nii.gz')),
                 mask=join(subjDataDir, 'masks/TDSL2_brain_mask.nii.gz'))




### Add the attributes to the dataset

In [85]:
# Add all sample attributes
for c in attributes.columns:
    ds.sa[c] = attributes[c]

In [90]:
# add a dummy chunk label. Only 1 run, so make this 0 for every volume
ds.sa['chunks'] = [0] * ds.shape[0]

### Preprocessing

In [95]:
# detrend the data to remove linear drift
detrender = PolyDetrendMapper(polyord=1)
detrended_ds = ds.get_mapped(detrender)

TypeError: Cannot change data-type for object array.

In [99]:
dir(ds)

['C',
 'O',
 'S',
 'T',
 'UC',
 'UT',
 '__array__',
 '__class__',
 '__copy__',
 '__deepcopy__',
 '__delattr__',
 '__dict__',
 '__doc__',
 '__format__',
 '__getattribute__',
 '__getitem__',
 '__hash__',
 '__init__',
 '__len__',
 '__module__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__repr_full__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_append_mapper',
 '_collection_id2obj',
 'a',
 'aggregate_features',
 'append',
 'chunks',
 'coarsen_chunks',
 'copy',
 'fa',
 'find_collection',
 'from_channeltimeseries',
 'from_hdf5',
 'from_npz',
 'from_wizard',
 'get_attr',
 'get_mapped',
 'get_nsamples_per_attr',
 'get_samples_by_attr',
 'get_samples_per_chunk_target',
 'idhash',
 'init_origids',
 'item',
 'mapper',
 'nfeatures',
 'nsamples',
 'random_samples',
 'remove_invariant_features',
 'remove_nonfinite_features',
 'sa',
 'samples',
 'save',
 'select',
 'set_attr',
 'shape',
 'summary',
 'summary_targets',
 'targets',
 'to_npz',
 '

In [100]:
ds.mapper

ChainMapper(nodes=[FlattenMapper(shape=(80, 80, 17), auto_train=True, space='voxel_indices'), StaticFeatureSelection(dshape=(108800,), slicearg=array([False, False, False, ..., False, False, False], dtype=bool))])