brainiak · mihaic · Jan 12, 2017 · Jan 7, 2017 · Jan 7, 2017 · Jan 9, 2017
diff --git a/.gitignore b/.gitignore
@@ -7,6 +7,9 @@
 *.o
 *.npy
 *.log
+*.nii.gz
+*.zip
+*.pkl
 
 brainiak.egg-info
 build
@@ -21,6 +24,9 @@ htmlcov
 .ipynb_checkpoints
 junit-*.xml
 __pycache__
+.DS_Store
+__MACOSX
 
 brainiak/fcma/cython_blas.c
 brainiak/eventseg/_utils.c
+brainiak/examples/fcma/face_scene/
diff --git a/brainiak/fcma/__init__.py b/brainiak/fcma/__init__.py
@@ -11,4 +11,21 @@
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
-"""Full correlation matrix analysis"""
+"""Full correlation matrix analysis
+
+The implementation is based on the following publications:
+
+.. [Wang2015-1] Full correlation matrix analysis (FCMA): An unbiased method for
+   task-related functional connectivity",
+   Yida Wang, Jonathan D Cohen, Kai Li, Nicholas B Turk-Browne.
+   Journal of Neuroscience Methods, 2015.
+
+.. [Wang2015-2] "Full correlation matrix analysis of fMRI data on Intel® Xeon
+   Phi™ coprocessors",
+   Yida Wang, Michael J. Anderson, Jonathan D. Cohen, Alexander Heinecke,
+   Kai Li, Nadathur Satish, Narayanan Sundaram, Nicholas B. Turk-Browne,
+   Theodore L. Willke.
+   In Proceedings of the International Conference for
+   High Performance Computing,
+   Networking, Storage and Analysis. 2015.
+"""
diff --git a/brainiak/fcma/classifier.py b/brainiak/fcma/classifier.py
@@ -13,12 +13,7 @@
 #  limitations under the License.
 """Full Correlation Matrix Analysis (FCMA)
 
-This implementation is based on the following publications:
-
-.. [Wang2015] Full correlation matrix analysis (FCMA): An unbiased method for
-   task-related functional connectivity",
-   Yida Wang, Jonathan D Cohen, Kai Li, Nicholas B Turk-Browne.
-   Journal of Neuroscience Methods, 2015.
+Correlation-based training and prediction
 """
 
 # Authors: Yida Wang

diff --git a/brainiak/fcma/mvpa_voxelselector.py b/brainiak/fcma/mvpa_voxelselector.py
@@ -0,0 +1,187 @@
+#  Copyright 2016 Intel Corporation
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+"""Full Correlation Matrix Analysis (FCMA)
+
+Activity-based voxel selection
+"""
+
+# Authors: Yida Wang
+# (Intel Labs), 2017
+
+import numpy as np
+from sklearn import model_selection
+from scipy.stats.mstats import zscore
+import logging
+from mpi4py import MPI
+
+logger = logging.getLogger(__name__)
+
+__all__ = [
+    "MVPAVoxelSelector",
+]
+
+
+class MVPAVoxelSelector:
+    """Activity-based voxel selection component of FCMA
+
+    Parameters
+    ----------
+
+    raw\_data: list of 4D array
+        each element of the list is the raw data of a subject,
+        in the shape of [x, y, z, t]
+
+    mask: 3D array
+
+    epoch\_info: list of tuple (label, sid, start, end).
+        label is the condition labels of the epochs;
+        sid is the subject id, corresponding to the index of raw_data;
+        start is the start TR of an epoch (inclusive);
+        end is the end TR of an epoch(exclusive).
+        Assuming len(labels) labels equals the number of epochs and
+        the epochs of the same sid are adjacent in epoch_info
+
+    num\_folds: int
+        the number of folds to be conducted in the cross validation
+
+    sl: Searchlight
+        the distributed Searchlight object
+
+    processed\_data\_: 4D array in shape [brain 3D + epoch]
+        contains the averaged and normalized brain data epoch by epoch
+        it is generated from raw\_data and epoch\_info
+
+    labels\_: 1D array
+        contains the labels of the epochs, extracted from epoch\_info
+    """
+    def __init__(self,
+                 raw_data,
+                 mask,
+                 epoch_info,
+                 num_folds,
+                 sl
+                 ):
+        self.raw_data = raw_data
+        self.mask = mask.astype(np.bool)
+        self.epoch_info = epoch_info
+        self.num_folds = num_folds
+        self.sl = sl
+        self.processed_data_ = None
+        self.labels_ = None
+        num_voxels = np.sum(self.mask)
+        if num_voxels == 0:
+            raise ValueError('Zero processed voxels')
+
+    def _preprocess_data(self):
+        """ process the raw data according to epoch info
+
+        This is done in rank 0 which has the raw_data read in
+        Average the activity within epochs and z-scoring within subject.
+        Write the results to self.processed_data,
+        which is a 4D array of averaged epoch by epoch processed data
+        Also write the labels to self.label as a 1D numpy array
+        """
+        logger.info(
+            'mask size: %d' %
+            np.sum(self.mask)
+        )
+        num_epochs = len(self.epoch_info)
+        (d1, d2, d3, _) = self.raw_data[0].shape
+        self.processed_data_ = np.empty([d1, d2, d3, num_epochs])
+        self.labels_ = np.empty(num_epochs)
+        subject_count = [0]  # counting the epochs per subject for z-scoring
+        cur_sid = -1
+        # averaging
+        for idx, epoch in enumerate(self.epoch_info):
+            self.labels_[idx] = epoch[0]
+            if cur_sid != epoch[1]:
+                subject_count.append(0)
+                cur_sid = epoch[1]
+            subject_count[-1] += 1
+            self.processed_data_[:, :, :, idx] = \
+                np.mean(self.raw_data[cur_sid][:, :, :, epoch[2]:epoch[3]],
+                        axis=3)
+        # z-scoring
+        cur_epoch = 0
+        for i in subject_count:
+            if i > 1:
+                self.processed_data_[:, :, :, cur_epoch:cur_epoch + i] = \
+                    zscore(self.processed_data_[:, :, :,
+                           cur_epoch:cur_epoch + i],
+                           axis=3, ddof=0)
+            cur_epoch += i
+        # if zscore fails (standard deviation is zero),
+        # set all values to be zero
+        self.processed_data_ = np.nan_to_num(self.processed_data_)
+
+    def run(self, clf):
+        """ run activity-based voxel selection
+
+        Sort the voxels based on the cross-validation accuracy
+        of their activity vectors within the searchlight
+
+        Parameters
+        ----------
+        clf: classification function
+            the classifier to be used in cross validation
+
+        Returns
+        -------
+        result_volume: 3D array of accuracy numbers
+            contains the voxelwise accuracy numbers obtained via Searchlight
+        results: list of tuple (voxel_id, accuracy)
+            the accuracy numbers of all voxels, in accuracy descending order
+            the length of array equals the number of voxels
+        """
+        rank = MPI.COMM_WORLD.Get_rank()
+        if rank == 0:
+            logger.info(
+                'running activity-based voxel selection via Searchlight'
+            )
+        if rank == 0:
+            self._preprocess_data()
+        self.sl.distribute([self.processed_data_], self.mask)
+        self.sl.broadcast((self.labels_, self.num_folds))
+        if rank == 0:
+            logger.info(
+                'data preparation done'
+            )
+
+        # Searchlight kernel function
+        def _sfn(l, mask, myrad, bcast_var):
+            data = l[0][mask, :].T
+            # print(l[0].shape, mask.shape, data.shape)
+            skf = model_selection.StratifiedKFold(n_splits=bcast_var[1],
+                                                  shuffle=False)
+            accuracy = np.mean(model_selection.cross_val_score(clf, data,
+                                                               y=bcast_var[0],
+                                                               cv=skf,
+                                                               n_jobs=1))
+            return accuracy
+        # obtain a 3D array with accuracy numbers
+        result_volume = self.sl.run_searchlight(_sfn)
+        # get result tuple list from the volume
+        result_list = result_volume[self.mask]
+        results = []
+        if rank == 0:
+            for idx, value in enumerate(result_list):
+                if value is None:
+                    value = 0
+                results.append((idx, value))
+            # Sort the voxels
+            results.sort(key=lambda tup: tup[1], reverse=True)
+            logger.info(
+                'activity-based voxel selection via Searchlight is done'
+            )
+        return result_volume, results
diff --git a/brainiak/fcma/voxelselector.py b/brainiak/fcma/voxelselector.py
@@ -13,21 +13,7 @@
 #  limitations under the License.
 """Full Correlation Matrix Analysis (FCMA)
 
-This implementation is based on the following publications:
-
-.. [Wang2015-1] Full correlation matrix analysis (FCMA): An unbiased method for
-   task-related functional connectivity",
-   Yida Wang, Jonathan D Cohen, Kai Li, Nicholas B Turk-Browne.
-   Journal of Neuroscience Methods, 2015.
-
-.. [Wang2015-2] "Full correlation matrix analysis of fMRI data on Intel® Xeon
-   Phi™ coprocessors",
-   Yida Wang, Michael J. Anderson, Jonathan D. Cohen, Alexander Heinecke,
-   Kai Li, Nadathur Satish, Narayanan Sundaram, Nicholas B. Turk-Browne,
-   Theodore L. Willke.
-   In Proceedings of the International Conference for
-   High Performance Computing,
-   Networking, Storage and Analysis. 2015.
+Correlation-based voxel selection
 """
 
 # Authors: Yida Wang

diff --git a/examples/fcma/download_data.sh b/examples/fcma/download_data.sh
@@ -0,0 +1,4 @@
+#!/bin/sh
+curl --location -o face_scene.zip https://www.dropbox.com/s/11adrjdkt0w1tr3/face_scene.zip?dl=0
+unzip -qo face_scene.zip
+rm -f face_scene.zip
diff --git a/examples/fcma/file_io.py b/examples/fcma/file_io.py
@@ -13,6 +13,7 @@
 #  limitations under the License.
 
 import nibabel as nib
+from nibabel.nifti1 import Nifti1Pair
 import os
 import math
 import time
@@ -126,7 +127,6 @@ def separate_epochs(activity_data, epoch_list):
     )
     return raw_data, labels
 
-
 def prepare_data(data_dir, extension, mask_file, epoch_file):
     """ read the data in and generate epochs of interests,
     then broadcast to all workers
@@ -178,3 +178,57 @@ def prepare_data(data_dir, extension, mask_file, epoch_file):
             (time2 - time1)
         )
     return raw_data, labels
+
+def generate_epochs_info(epoch_list):
+    """ use epoch_list to generate epoch_info defined below
+
+    Parameters
+    ----------
+    epoch\_list: list of 3D (binary) array in shape [condition, nEpochs, nTRs]
+        Contains specification of epochs and conditions,
+        Assumption: 1. all subjects have the same number of epochs;
+                     2. len(epoch_list) equals the number of subjects;
+                     3. an epoch is always a continuous time course.
+
+    Returns
+    -------
+    epoch\_info: list of tuple (label, sid, start, end).
+        label is the condition labels of the epochs;
+        sid is the subject id, corresponding to the index of raw_data;
+        start is the start TR of an epoch (inclusive);
+        end is the end TR of an epoch(exclusive).
+        Assuming len(labels) labels equals the number of epochs and
+        the epochs of the same sid are adjacent in epoch_info
+    """
+    time1 = time.time()
+    epoch_info = []
+    for sid, epoch in enumerate(epoch_list):
+        for cond in range(epoch.shape[0]):
+            sub_epoch = epoch[cond, :, :]
+            for eid in range(epoch.shape[1]):
+                r = np.sum(sub_epoch[eid, :])
+                if r > 0:   # there is an epoch in this condition
+                    start = np.nonzero(sub_epoch[eid, :])[0][0]
+                    epoch_info.append((cond, sid, start, start+r))
+    time2 = time.time()
+    logger.debug(
+        'epoch separation done, takes %.2f s' %
+        (time2 - time1)
+    )
+    return epoch_info
+
+
+def write_nifti_file(data, affine, filename):
+    """ write a nifti file given data and affine
+
+    Parameters
+    ----------
+    data: 3D/4D numpy array
+        the brain data with/without time dimension
+    affine: 2D numpy array
+        affine of the image, usually inherited from an existing image
+    filename: string
+        the output filename
+    """
+    img = Nifti1Pair(data, affine)
+    nib.nifti1.save(img, filename)