From 81972f79261c563f9260d5045755ea80f0c002b3 Mon Sep 17 00:00:00 2001
From: Yida Wang <yidawa@gmail.com>
Date: Wed, 26 Apr 2017 22:26:50 -0700
Subject: [PATCH 1/8] add random permutation to FCMA

---
 brainiak/fcma/preprocessing.py        | 110 ++++++++++++++++++++++++--
 brainiak/fcma/voxelselector.py        |  16 +++-
 brainiak/io.py                        |  11 +++
 examples/fcma/mvpa_voxel_selection.py |  15 +++-
 examples/fcma/voxel_selection.py      |  16 +++-
 tests/fcma/test_preprocessing.py      |  36 ++++++++-
 6 files changed, 189 insertions(+), 15 deletions(-)

diff --git a/brainiak/fcma/preprocessing.py b/brainiak/fcma/preprocessing.py
index 5a620237d..4872c7739 100644
--- a/brainiak/fcma/preprocessing.py
+++ b/brainiak/fcma/preprocessing.py
@@ -22,12 +22,21 @@
 import logging
 from scipy.stats.mstats import zscore
 from mpi4py import MPI
+from enum import Enum
 
 from ..image import mask_images, multimask_images
 
 
 logger = logging.getLogger(__name__)
 
+__all__ = [
+    "RandomType",
+    "prepare_fcma_data",
+    "generate_epochs_info",
+    "prepare_mvpa_data",
+    "prepare_searchlight_mvpa_data",
+]
+
 
 def _separate_epochs(activity_data, epoch_list):
     """ create data epoch by epoch
@@ -37,9 +46,9 @@ def _separate_epochs(activity_data, epoch_list):
 
     Parameters
     ----------
-    activity\_data: list of 2D array in shape [nVoxels, nTRs]
+    activity_data: list of 2D array in shape [nVoxels, nTRs]
         the masked activity data organized in voxel*TR formats of all subjects
-    epoch\_list: list of 3D array in shape [condition, nEpochs, nTRs]
+    epoch_list: list of 3D array in shape [condition, nEpochs, nTRs]
         specification of epochs and conditions
         assuming all subjects have the same number of epochs
         len(epoch_list) equals the number of subjects
@@ -83,8 +92,72 @@ def _separate_epochs(activity_data, epoch_list):
     return raw_data, labels
 
 
+def _randomize_single_subject(data, seed=None):
+    """Randomly permute the voxels of the subject.
+
+     The subject is organized as Voxel x TR,
+     this method shuffles the voxel dimension.
+
+    Parameters
+    ----------
+    data: 2D array in shape [nVxels, nTRs]
+        Activity data.
+    seed: Optional[int]
+        Seed for random state used implicitly for shuffling.
+
+    Returns
+    data: 2D array in shape [nVxels, nTRs]
+        Activity data with the voxel dimension shuffled.
+    """
+    if seed is not None:
+        np.random.seed(seed)
+    np.random.shuffle(data)
+    return data
+
+
+def _randomize_subject_list(data_list, random):
+    """Randomly permute the voxels of a subject list.
+
+     The method shuffles the subject one by one according to
+     the random type. If RandomType.NORANDOM, return the
+     original list.
+
+    Parameters
+    ----------
+    data_list: list of 2D array in shape [nVxels, nTRs]
+        Activity data list.
+    random: RandomType
+        Randomization type.
+
+    Returns
+    data_list: list of 2D array in shape [nVxels, nTRs]
+        (Randomized) activity data list.
+    """
+    if random == RandomType.REPRODUCIBLE:
+        data_list = [_randomize_single_subject(data, seed=idx)
+                     for idx, data in enumerate(data_list)]
+    elif random == RandomType.UNREPRODUCIBLE:
+        data_list = [_randomize_single_subject(data)
+                     for data in data_list]
+    return data_list
+
+
+class RandomType(Enum):
+    """Define the random types as enumeration
+
+    NORANDOM means do not randomize the data;
+    REPRODUCIBLE means randomize the data with a fixed seed so that the
+    permutation holds between different runs;
+    UNREPRODUCIBLE means truly randomize the data which returns different
+    results in different runs.
+    """
+    NORANDOM = 0
+    REPRODUCIBLE = 1
+    UNREPRODUCIBLE = 2
+
+
 def prepare_fcma_data(images, conditions, mask1, mask2=None,
-                      comm=MPI.COMM_WORLD):
+                      random=RandomType.NORANDOM, comm=MPI.COMM_WORLD):
     """Prepare data for correlation-based computation and analysis.
 
     Generate epochs of interests, then broadcast to all workers.
@@ -102,6 +175,9 @@ def prepare_fcma_data(images, conditions, mask1, mask2=None,
         If it is not specified, the method will assign None to the returning
         variable raw_data2 and the self-correlation on raw_data1 will be
         computed
+    random: Optional[RandomType]
+        Randomize the data within subject or not.
+        Default NORANDOM
     comm: MPI.Comm
         MPI communicator to use for MPI operations.
 
@@ -127,9 +203,11 @@ def prepare_fcma_data(images, conditions, mask1, mask2=None,
             activity_data1, activity_data2 = zip(*multimask_images(images,
                                                                    masks,
                                                                    np.float32))
+            _randomize_subject_list(activity_data2, random)
             raw_data2, _ = _separate_epochs(activity_data2, conditions)
         else:
             activity_data1 = list(mask_images(images, mask1, np.float32))
+        _randomize_subject_list(activity_data1, random)
         raw_data1, labels = _separate_epochs(activity_data1, conditions)
         time1 = time.time()
     raw_data_length = len(raw_data1)
@@ -170,7 +248,7 @@ def generate_epochs_info(epoch_list):
 
     Returns
     -------
-    epoch\_info: list of tuple (label, sid, start, end).
+    epoch_info: list of tuple (label, sid, start, end).
         label is the condition labels of the epochs;
         sid is the subject id, corresponding to the index of raw_data;
         start is the start TR of an epoch (inclusive);
@@ -250,7 +328,8 @@ def prepare_mvpa_data(images, conditions, mask):
     return processed_data, labels
 
 
-def prepare_searchlight_mvpa_data(images, conditions, data_type=np.float32):
+def prepare_searchlight_mvpa_data(images, conditions, data_type=np.float32,
+                                  random=RandomType.NORANDOM):
     """ obtain the data for activity-based voxel selection using Searchlight
 
     Average the activity within epochs and z-scoring within subject,
@@ -266,10 +345,13 @@ def prepare_searchlight_mvpa_data(images, conditions, data_type=np.float32):
         Condition specification.
     data_type
         Type to cast image to.
+    random: Optional[RandomType]
+        Randomize the data within subject or not.
+        Default NORANDOM
 
     Returns
     -------
-    processed\_data: 4D array in shape [brain 3D + epoch]
+    processed_data: 4D array in shape [brain 3D + epoch]
         averaged epoch by epoch processed data
 
     labels: 1D array
@@ -292,8 +374,20 @@ def prepare_searchlight_mvpa_data(images, conditions, data_type=np.float32):
 
     for sid, f in enumerate(images):
         data = f.get_data().astype(data_type)
+        [d1, d2, d3, d4] = data.shape
+        if random == RandomType.REPRODUCIBLE:
+            data = _randomize_single_subject(data.reshape((d1 * d2 * d3, d4)),
+                                             seed=sid).reshape((d1,
+                                                                d2,
+                                                                d3,
+                                                                d4))
+        elif random == RandomType.UNREPRODUCIBLE:
+            data = _randomize_single_subject(
+                data.reshape((d1 * d2 * d3, d4))).reshape((d1,
+                                                           d2,
+                                                           d3,
+                                                           d4))
         if processed_data is None:
-            [d1, d2, d3, _] = data.shape
             processed_data = np.empty([d1, d2, d3, num_epochs],
                                       dtype=data_type)
         # averaging
@@ -303,7 +397,7 @@ def prepare_searchlight_mvpa_data(images, conditions, data_type=np.float32):
                 processed_data[:, :, :, idx] = \
                     np.mean(data[:, :, :, epoch[2]:epoch[3]], axis=3)
 
-        logger.info(
+        logger.debug(
             'file %s is loaded and processed, with data shape %s' %
             (f.get_filename(), data.shape)
         )
diff --git a/brainiak/fcma/voxelselector.py b/brainiak/fcma/voxelselector.py
index 78ea330a6..ea3331515 100644
--- a/brainiak/fcma/voxelselector.py
+++ b/brainiak/fcma/voxelselector.py
@@ -163,7 +163,8 @@ def _master(self):
             the length of array equals the number of voxels
         """
         logger.info(
-            'Master starts to allocate tasks'
+            'Master at rank %d starts to allocate tasks' %
+            MPI.COMM_WORLD.Get_rank()
         )
         results = []
         comm = MPI.COMM_WORLD
@@ -181,6 +182,10 @@ def _master(self):
             if current_task[1] == 0:
                 using_size = i
                 break
+            logger.debug(
+                'master starts to send a task to worker %d' %
+                i
+            )
             comm.send(current_task,
                       dest=i,
                       tag=self._WORKTAG)
@@ -235,6 +240,10 @@ def _worker(self, clf):
         -------
         None
         """
+        logger.debug(
+            'worker %d is running, waiting for tasks from master at rank %d' %
+            (MPI.COMM_WORLD.Get_rank(), self.master_rank)
+        )
         comm = MPI.COMM_WORLD
         status = MPI.Status()
         while 1:
@@ -264,6 +273,11 @@ def _correlation_computation(self, task):
         time1 = time.time()
         s = task[0]
         nEpochs = len(self.raw_data)
+        logger.debug(
+            'start to compute the correlation: #epochs: %d, '
+            '#processed voxels: %d, #total voxels to compute against: %d' %
+            (nEpochs, task[1], self.num_voxels2)
+        )
         corr = np.zeros((task[1], nEpochs, self.num_voxels2),
                         np.float32, order='C')
         count = 0
diff --git a/brainiak/io.py b/brainiak/io.py
index 62937bdae..715a6b78f 100644
--- a/brainiak/io.py
+++ b/brainiak/io.py
@@ -25,12 +25,15 @@
 
 import nibabel as nib
 import numpy as np
+import logging
 
 from nibabel.nifti1 import Nifti1Pair
 from nibabel.spatialimages import SpatialImage
 
 from .image import SingleConditionSpec
 
+logger = logging.getLogger(__name__)
+
 
 def load_images_from_dir(in_dir: Union[str, Path], suffix: str = "nii.gz",
                          ) -> Iterable[SpatialImage]:
@@ -59,6 +62,10 @@ def load_images_from_dir(in_dir: Union[str, Path], suffix: str = "nii.gz",
     files = sorted(in_dir.glob("*" + suffix))
     for f in files:
         yield nib.load(str(f))
+        logger.info(
+            'file %s is read' %
+            str(f)
+        )
 
 
 def load_images(image_paths: Iterable[Union[str, Path]]
@@ -86,6 +93,10 @@ def load_images(image_paths: Iterable[Union[str, Path]]
             string_path = str(image_path)
         else:
             string_path = image_path
+        logger.info(
+            'starts to read file %s' %
+            string_path
+        )
         yield nib.load(string_path)
 
 
diff --git a/examples/fcma/mvpa_voxel_selection.py b/examples/fcma/mvpa_voxel_selection.py
index 1892d5e80..2327579e3 100644
--- a/examples/fcma/mvpa_voxel_selection.py
+++ b/examples/fcma/mvpa_voxel_selection.py
@@ -53,11 +53,24 @@
             'mask size: %d' %
             np.sum(mask)
         )
-        images = io.load_images_from_dir(data_dir, extension)
+        images = io.load_images_from_dir(data_dir, suffix=extension)
         conditions = io.load_labels(epoch_file)
         data, labels = prepare_searchlight_mvpa_data(images, conditions)
+
+        # setting the random argument produces random voxel selection results
+        # for non-parametric statistical analysis.
+        # There are three random options:
+        # RandomType.NORANDOM is the default
+        # RandomType.REPRODUCIBLE permutes the voxels in the same way every run
+        # RandomType.UNREPRODUCIBLE permutes the voxels differently across runs
+        # example
+        #from brainiak.fcma.preprocessing import RandomType
+        #data, labels = prepare_searchlight_mvpa_data(images, conditions,
+        #                                                    random=RandomType.UNREPRODUCIBLE)
+
         # the following line is an example to leaving a subject out
         #epoch_info = [x for x in epoch_info if x[1] != 0]
+
     num_subjs = int(sys.argv[5])
     # create a Searchlight object
     sl = Searchlight(sl_rad=1)
diff --git a/examples/fcma/voxel_selection.py b/examples/fcma/voxel_selection.py
index 27c570181..1d03e050b 100644
--- a/examples/fcma/voxel_selection.py
+++ b/examples/fcma/voxel_selection.py
@@ -41,16 +41,30 @@
     extension = sys.argv[2]
     mask_file = sys.argv[3]
     epoch_file = sys.argv[4]
-    images = io.load_images_from_dir(data_dir, extension)
+    images = io.load_images_from_dir(data_dir, suffix=extension)
     mask = io.load_boolean_mask(mask_file)
     conditions = io.load_labels(epoch_file)
     raw_data, _, labels = prepare_fcma_data(images, conditions, mask)
+
+    # setting the random argument produces random voxel selection results
+    # for non-parametric statistical analysis.
+    # There are three random options:
+    # RandomType.NORANDOM is the default
+    # RandomType.REPRODUCIBLE permutes the voxels in the same way every run
+    # RandomType.UNREPRODUCIBLE permutes the voxels differently across runs
+    # example:
+    # from brainiak.fcma.preprocessing import RandomType
+    # raw_data, _, labels = prepare_fcma_data(images, conditions, mask,
+    #                                         random=RandomType.REPRODUCIBLE)
+
     # if providing two masks, just append the second mask as the last input argument
     # and specify raw_data2
+    # example:
     # images = io.load_images_from_dir(data_dir, extension)
     # mask2 = io.load_boolean_mask('face_scene/mask.nii.gz')
     # raw_data, raw_data2, labels = prepare_fcma_data(images, conditions, mask,
     #                                                 mask2)
+
     epochs_per_subj = int(sys.argv[5])
     num_subjs = int(sys.argv[6])
     # the following line is an example to leaving a subject out
diff --git a/tests/fcma/test_preprocessing.py b/tests/fcma/test_preprocessing.py
index 554b95e34..908149610 100644
--- a/tests/fcma/test_preprocessing.py
+++ b/tests/fcma/test_preprocessing.py
@@ -29,7 +29,7 @@
 
 
 def test_prepare_fcma_data():
-    images = io.load_images_from_dir(data_dir, suffix)
+    images = io.load_images_from_dir(data_dir, suffix=suffix)
     mask = io.load_boolean_mask(mask_file)
     conditions = io.load_labels(epoch_file)
     raw_data, _, labels = prepare_fcma_data(images, conditions, mask)
@@ -41,9 +41,22 @@ def test_prepare_fcma_data():
             'raw data do not match in test_prepare_fcma_data'
     assert np.array_equal(labels, expected_labels), \
         'the labels do not match in test_prepare_fcma_data'
+    from brainiak.fcma.preprocessing import RandomType
+    images = io.load_images_from_dir(data_dir, suffix=suffix)
+    random_raw_data, _, _ = prepare_fcma_data(images, conditions, mask,
+                                                   random=
+                                                   RandomType.REPRODUCIBLE)
+    assert len(random_raw_data) == len(expected_raw_data), \
+        'numbers of epochs do not match in test_prepare_fcma_data'
+    images = io.load_images_from_dir(data_dir, suffix=suffix)
+    random_raw_data, _, _ = prepare_fcma_data(images, conditions, mask,
+                                                   random=
+                                                   RandomType.UNREPRODUCIBLE)
+    assert len(random_raw_data) == len(expected_raw_data), \
+        'numbers of epochs do not match in test_prepare_fcma_data'
 
 def test_prepare_mvpa_data():
-    images = io.load_images_from_dir(data_dir, suffix)
+    images = io.load_images_from_dir(data_dir, suffix=suffix)
     mask = io.load_boolean_mask(mask_file)
     conditions = io.load_labels(epoch_file)
     processed_data, labels = prepare_mvpa_data(images, conditions, mask)
@@ -58,10 +71,10 @@ def test_prepare_mvpa_data():
         'the labels do not match in test_prepare_mvpa_data'
 
 def test_prepare_searchlight_mvpa_data():
-    images = io.load_images_from_dir(data_dir, suffix)
+    images = io.load_images_from_dir(data_dir, suffix=suffix)
     conditions = io.load_labels(epoch_file)
     processed_data, labels = prepare_searchlight_mvpa_data(images,
-                                                              conditions)
+                                                           conditions)
     expected_searchlight_processed_data = np.load(
         expected_dir / 'expected_searchlight_processed_data.npy')
     for idx in range(len(processed_data)):
@@ -69,6 +82,21 @@ def test_prepare_searchlight_mvpa_data():
             'raw data do not match in test_prepare_searchlight_mvpa_data'
     assert np.array_equal(labels, expected_labels), \
         'the labels do not match in test_prepare_searchlight_mvpa_data'
+    from brainiak.fcma.preprocessing import RandomType
+    images = io.load_images_from_dir(data_dir, suffix=suffix)
+    random_processed_data, _ = prepare_searchlight_mvpa_data(images,
+                                                             conditions,
+                                                             random=
+                                                             RandomType.REPRODUCIBLE)
+    assert len(random_processed_data) == len(expected_searchlight_processed_data), \
+        'numbers of epochs do not match in test_prepare_searchlight_mvpa_data'
+    images = io.load_images_from_dir(data_dir, suffix=suffix)
+    random_processed_data, _ = prepare_searchlight_mvpa_data(images,
+                                                             conditions,
+                                                             random=
+                                                             RandomType.UNREPRODUCIBLE)
+    assert len(random_processed_data) == len(expected_searchlight_processed_data), \
+        'numbers of epochs do not match in test_prepare_searchlight_mvpa_data'
 
 if __name__ == '__main__':
     test_prepare_fcma_data()

From 507bbf323208d45240e1c8dccf561e3df207d76a Mon Sep 17 00:00:00 2001
From: Yida Wang <yidawa@gmail.com>
Date: Thu, 27 Apr 2017 10:00:00 -0700
Subject: [PATCH 2/8] addressing review comments from Bryn

---
 brainiak/fcma/preprocessing.py | 20 ++++++++++----------
 brainiak/fcma/voxelselector.py |  2 +-
 brainiak/io.py                 |  6 +++---
 3 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/brainiak/fcma/preprocessing.py b/brainiak/fcma/preprocessing.py
index 4872c7739..f5e761d12 100644
--- a/brainiak/fcma/preprocessing.py
+++ b/brainiak/fcma/preprocessing.py
@@ -100,13 +100,13 @@ def _randomize_single_subject(data, seed=None):
 
     Parameters
     ----------
-    data: 2D array in shape [nVxels, nTRs]
+    data: 2D array in shape [nVoxels, nTRs]
         Activity data.
     seed: Optional[int]
         Seed for random state used implicitly for shuffling.
 
     Returns
-    data: 2D array in shape [nVxels, nTRs]
+    data: 2D array in shape [nVoxels, nTRs]
         Activity data with the voxel dimension shuffled.
     """
     if seed is not None:
@@ -134,11 +134,11 @@ def _randomize_subject_list(data_list, random):
         (Randomized) activity data list.
     """
     if random == RandomType.REPRODUCIBLE:
-        data_list = [_randomize_single_subject(data, seed=idx)
-                     for idx, data in enumerate(data_list)]
+        for i in range(len(data_list)):
+            data_list[i] = _randomize_single_subject(data_list[i], seed=i)
     elif random == RandomType.UNREPRODUCIBLE:
-        data_list = [_randomize_single_subject(data)
-                     for data in data_list]
+        for i in range(len(data_list)):
+            data_list[i] = _randomize_single_subject(data_list[i])
     return data_list
 
 
@@ -203,11 +203,11 @@ def prepare_fcma_data(images, conditions, mask1, mask2=None,
             activity_data1, activity_data2 = zip(*multimask_images(images,
                                                                    masks,
                                                                    np.float32))
-            _randomize_subject_list(activity_data2, random)
+            activity_data2 = _randomize_subject_list(activity_data2, random)
             raw_data2, _ = _separate_epochs(activity_data2, conditions)
         else:
             activity_data1 = list(mask_images(images, mask1, np.float32))
-        _randomize_subject_list(activity_data1, random)
+        activity_data1 = _randomize_subject_list(activity_data1, random)
         raw_data1, labels = _separate_epochs(activity_data1, conditions)
         time1 = time.time()
     raw_data_length = len(raw_data1)
@@ -398,8 +398,8 @@ def prepare_searchlight_mvpa_data(images, conditions, data_type=np.float32,
                     np.mean(data[:, :, :, epoch[2]:epoch[3]], axis=3)
 
         logger.debug(
-            'file %s is loaded and processed, with data shape %s' %
-            (f.get_filename(), data.shape)
+            'file %s is loaded and processed, with data shape %s',
+            f.get_filename(), data.shape
         )
     # z-scoring
     cur_epoch = 0
diff --git a/brainiak/fcma/voxelselector.py b/brainiak/fcma/voxelselector.py
index ea3331515..9cfd0693a 100644
--- a/brainiak/fcma/voxelselector.py
+++ b/brainiak/fcma/voxelselector.py
@@ -163,7 +163,7 @@ def _master(self):
             the length of array equals the number of voxels
         """
         logger.info(
-            'Master at rank %d starts to allocate tasks' %
+            'Master at rank %d starts to allocate tasks',
             MPI.COMM_WORLD.Get_rank()
         )
         results = []
diff --git a/brainiak/io.py b/brainiak/io.py
index 715a6b78f..070119ae7 100644
--- a/brainiak/io.py
+++ b/brainiak/io.py
@@ -63,8 +63,8 @@ def load_images_from_dir(in_dir: Union[str, Path], suffix: str = "nii.gz",
     for f in files:
         yield nib.load(str(f))
         logger.info(
-            'file %s is read' %
-            str(f)
+            'file %s is read',
+            f
         )
 
 
@@ -141,7 +141,7 @@ def load_labels(path: Union[str, Path]) -> List[SingleConditionSpec]:
     List[SingleConditionSpec]
         List of SingleConditionSpec stored in labels file.
     """
-    condition_specs = np.load(path)
+    condition_specs = np.load(str(path))
     return [c.view(SingleConditionSpec) for c in condition_specs]
 
 

From 5f04e95705e01df12457988b1c22af97dba1b5ec Mon Sep 17 00:00:00 2001
From: Yida Wang <yidawa@gmail.com>
Date: Thu, 27 Apr 2017 11:21:56 -0700
Subject: [PATCH 3/8] addressing review comments from Mihai

---
 brainiak/fcma/preprocessing.py |  3 ++-
 brainiak/io.py                 | 14 ++++++--------
 2 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/brainiak/fcma/preprocessing.py b/brainiak/fcma/preprocessing.py
index f5e761d12..ddf4133c1 100644
--- a/brainiak/fcma/preprocessing.py
+++ b/brainiak/fcma/preprocessing.py
@@ -198,6 +198,7 @@ def prepare_fcma_data(images, conditions, mask1, mask2=None,
     raw_data1 = []
     raw_data2 = []
     if rank == 0:
+        logger.info('start to apply masks and separate epochs')
         if mask2 is not None:
             masks = (mask1, mask2)
             activity_data1, activity_data2 = zip(*multimask_images(images,
@@ -347,7 +348,6 @@ def prepare_searchlight_mvpa_data(images, conditions, data_type=np.float32,
         Type to cast image to.
     random: Optional[RandomType]
         Randomize the data within subject or not.
-        Default NORANDOM
 
     Returns
     -------
@@ -372,6 +372,7 @@ def prepare_searchlight_mvpa_data(images, conditions, data_type=np.float32,
     # counting the epochs per subject for z-scoring
     subject_count = np.zeros(len(conditions), dtype=np.int32)
 
+    logger.info('start to apply masks and separate epochs')
     for sid, f in enumerate(images):
         data = f.get_data().astype(data_type)
         [d1, d2, d3, d4] = data.shape
diff --git a/brainiak/io.py b/brainiak/io.py
index 070119ae7..b4e71bd25 100644
--- a/brainiak/io.py
+++ b/brainiak/io.py
@@ -23,9 +23,9 @@
 from pathlib import Path
 from typing import Callable, Iterable, List, Union
 
+import logging
 import nibabel as nib
 import numpy as np
-import logging
 
 from nibabel.nifti1 import Nifti1Pair
 from nibabel.spatialimages import SpatialImage
@@ -61,11 +61,10 @@ def load_images_from_dir(in_dir: Union[str, Path], suffix: str = "nii.gz",
         in_dir = Path(in_dir)
     files = sorted(in_dir.glob("*" + suffix))
     for f in files:
-        yield nib.load(str(f))
-        logger.info(
-            'file %s is read',
-            f
+        logger.debug(
+            'starts to read file %s', f
         )
+        yield nib.load(str(f))
 
 
 def load_images(image_paths: Iterable[Union[str, Path]]
@@ -93,9 +92,8 @@ def load_images(image_paths: Iterable[Union[str, Path]]
             string_path = str(image_path)
         else:
             string_path = image_path
-        logger.info(
-            'starts to read file %s' %
-            string_path
+        logger.debug(
+            'starts to read file %s', string_path
         )
         yield nib.load(string_path)
 

From cec34452d089c12533da4d5e85b775b141958573 Mon Sep 17 00:00:00 2001
From: Yida Wang <yidawa@gmail.com>
Date: Thu, 27 Apr 2017 11:23:31 -0700
Subject: [PATCH 4/8] further changes according to the comments

---
 examples/fcma/mvpa_voxel_selection.py | 4 ++--
 examples/fcma/voxel_selection.py      | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/examples/fcma/mvpa_voxel_selection.py b/examples/fcma/mvpa_voxel_selection.py
index 2327579e3..807d1f520 100644
--- a/examples/fcma/mvpa_voxel_selection.py
+++ b/examples/fcma/mvpa_voxel_selection.py
@@ -39,7 +39,7 @@
             MPI.COMM_WORLD.Get_size()
         )
     data_dir = sys.argv[1]
-    extension = sys.argv[2]
+    suffix = sys.argv[2]
     mask_file = sys.argv[3]
     epoch_file = sys.argv[4]
 
@@ -53,7 +53,7 @@
             'mask size: %d' %
             np.sum(mask)
         )
-        images = io.load_images_from_dir(data_dir, suffix=extension)
+        images = io.load_images_from_dir(data_dir, suffix=suffix)
         conditions = io.load_labels(epoch_file)
         data, labels = prepare_searchlight_mvpa_data(images, conditions)
 
diff --git a/examples/fcma/voxel_selection.py b/examples/fcma/voxel_selection.py
index 1d03e050b..a1efd6c8f 100644
--- a/examples/fcma/voxel_selection.py
+++ b/examples/fcma/voxel_selection.py
@@ -38,10 +38,10 @@
             MPI.COMM_WORLD.Get_size()
         )
     data_dir = sys.argv[1]
-    extension = sys.argv[2]
+    suffix = sys.argv[2]
     mask_file = sys.argv[3]
     epoch_file = sys.argv[4]
-    images = io.load_images_from_dir(data_dir, suffix=extension)
+    images = io.load_images_from_dir(data_dir, suffix=suffix)
     mask = io.load_boolean_mask(mask_file)
     conditions = io.load_labels(epoch_file)
     raw_data, _, labels = prepare_fcma_data(images, conditions, mask)

From ec340ecc267f14263768c7dd1fd71e99f7a1857a Mon Sep 17 00:00:00 2001
From: Yida Wang <yidawa@gmail.com>
Date: Thu, 27 Apr 2017 13:10:57 -0700
Subject: [PATCH 5/8] addressing review comments

---
 brainiak/io.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/brainiak/io.py b/brainiak/io.py
index b4e71bd25..50e32f12a 100644
--- a/brainiak/io.py
+++ b/brainiak/io.py
@@ -62,7 +62,7 @@ def load_images_from_dir(in_dir: Union[str, Path], suffix: str = "nii.gz",
     files = sorted(in_dir.glob("*" + suffix))
     for f in files:
         logger.debug(
-            'starts to read file %s', f
+            'Starting to read file %s', f
         )
         yield nib.load(str(f))
 
@@ -93,7 +93,7 @@ def load_images(image_paths: Iterable[Union[str, Path]]
         else:
             string_path = image_path
         logger.debug(
-            'starts to read file %s', string_path
+            'Starting to read file %s', string_path
         )
         yield nib.load(string_path)
 

From 11383d17e26cc60e4e64c9b5ebdc98c1483bdda4 Mon Sep 17 00:00:00 2001
From: Yida Wang <yidawa@gmail.com>
Date: Fri, 28 Apr 2017 11:45:21 -0700
Subject: [PATCH 6/8] addressing Bryn's comments

---
 brainiak/fcma/preprocessing.py | 29 ++++++++++++++---------------
 1 file changed, 14 insertions(+), 15 deletions(-)

diff --git a/brainiak/fcma/preprocessing.py b/brainiak/fcma/preprocessing.py
index ddf4133c1..c97a4affb 100644
--- a/brainiak/fcma/preprocessing.py
+++ b/brainiak/fcma/preprocessing.py
@@ -107,12 +107,11 @@ def _randomize_single_subject(data, seed=None):
 
     Returns
     data: 2D array in shape [nVoxels, nTRs]
-        Activity data with the voxel dimension shuffled.
+        Activity data is shuffled in the voxel dimension in place.
     """
     if seed is not None:
         np.random.seed(seed)
     np.random.shuffle(data)
-    return data
 
 
 def _randomize_subject_list(data_list, random):
@@ -130,16 +129,16 @@ def _randomize_subject_list(data_list, random):
         Randomization type.
 
     Returns
+    -------
     data_list: list of 2D array in shape [nVxels, nTRs]
-        (Randomized) activity data list.
+        (Randomized) activity data list will be modified in place.
     """
     if random == RandomType.REPRODUCIBLE:
         for i in range(len(data_list)):
-            data_list[i] = _randomize_single_subject(data_list[i], seed=i)
+            _randomize_single_subject(data_list[i], seed=i)
     elif random == RandomType.UNREPRODUCIBLE:
-        for i in range(len(data_list)):
-            data_list[i] = _randomize_single_subject(data_list[i])
-    return data_list
+        for data in data_list:
+            _randomize_single_subject(data)
 
 
 class RandomType(Enum):
@@ -204,11 +203,11 @@ def prepare_fcma_data(images, conditions, mask1, mask2=None,
             activity_data1, activity_data2 = zip(*multimask_images(images,
                                                                    masks,
                                                                    np.float32))
-            activity_data2 = _randomize_subject_list(activity_data2, random)
+            _randomize_subject_list(activity_data2, random)
             raw_data2, _ = _separate_epochs(activity_data2, conditions)
         else:
             activity_data1 = list(mask_images(images, mask1, np.float32))
-        activity_data1 = _randomize_subject_list(activity_data1, random)
+        _randomize_subject_list(activity_data1, random)
         raw_data1, labels = _separate_epochs(activity_data1, conditions)
         time1 = time.time()
     raw_data_length = len(raw_data1)
@@ -377,13 +376,13 @@ def prepare_searchlight_mvpa_data(images, conditions, data_type=np.float32,
         data = f.get_data().astype(data_type)
         [d1, d2, d3, d4] = data.shape
         if random == RandomType.REPRODUCIBLE:
-            data = _randomize_single_subject(data.reshape((d1 * d2 * d3, d4)),
-                                             seed=sid).reshape((d1,
-                                                                d2,
-                                                                d3,
-                                                                d4))
+            _randomize_single_subject(data.reshape((d1 * d2 * d3, d4)),
+                                      seed=sid).reshape((d1,
+                                                         d2,
+                                                         d3,
+                                                         d4))
         elif random == RandomType.UNREPRODUCIBLE:
-            data = _randomize_single_subject(
+            _randomize_single_subject(
                 data.reshape((d1 * d2 * d3, d4))).reshape((d1,
                                                            d2,
                                                            d3,

From ec78ccd478b771424735702b0e4d0fe8ce5c72e6 Mon Sep 17 00:00:00 2001
From: Yida Wang <yidawa@gmail.com>
Date: Fri, 28 Apr 2017 12:20:12 -0700
Subject: [PATCH 7/8] updating docstrings

---
 brainiak/fcma/preprocessing.py | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/brainiak/fcma/preprocessing.py b/brainiak/fcma/preprocessing.py
index c97a4affb..333cebc85 100644
--- a/brainiak/fcma/preprocessing.py
+++ b/brainiak/fcma/preprocessing.py
@@ -96,18 +96,18 @@ def _randomize_single_subject(data, seed=None):
     """Randomly permute the voxels of the subject.
 
      The subject is organized as Voxel x TR,
-     this method shuffles the voxel dimension.
+     this method shuffles the voxel dimension in place.
 
     Parameters
     ----------
     data: 2D array in shape [nVoxels, nTRs]
-        Activity data.
+        Activity data to be shuffled.
     seed: Optional[int]
         Seed for random state used implicitly for shuffling.
 
     Returns
-    data: 2D array in shape [nVoxels, nTRs]
-        Activity data is shuffled in the voxel dimension in place.
+    -------
+    None.
     """
     if seed is not None:
         np.random.seed(seed)
@@ -117,21 +117,19 @@ def _randomize_single_subject(data, seed=None):
 def _randomize_subject_list(data_list, random):
     """Randomly permute the voxels of a subject list.
 
-     The method shuffles the subject one by one according to
-     the random type. If RandomType.NORANDOM, return the
-     original list.
+     The method shuffles the subject one by one in place according to
+     the random type. If RandomType.NORANDOM, return the original list.
 
     Parameters
     ----------
     data_list: list of 2D array in shape [nVxels, nTRs]
-        Activity data list.
+        Activity data list to be shuffled.
     random: RandomType
         Randomization type.
 
     Returns
     -------
-    data_list: list of 2D array in shape [nVxels, nTRs]
-        (Randomized) activity data list will be modified in place.
+    None.
     """
     if random == RandomType.REPRODUCIBLE:
         for i in range(len(data_list)):

From e8d60a82ab2e50ae5435d156f5626e5e1295f07d Mon Sep 17 00:00:00 2001
From: Yida Wang <yidawa@gmail.com>
Date: Fri, 28 Apr 2017 13:48:00 -0700
Subject: [PATCH 8/8] fix building error issue

---
 brainiak/fcma/preprocessing.py | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/brainiak/fcma/preprocessing.py b/brainiak/fcma/preprocessing.py
index 333cebc85..9ea3121f7 100644
--- a/brainiak/fcma/preprocessing.py
+++ b/brainiak/fcma/preprocessing.py
@@ -374,17 +374,13 @@ def prepare_searchlight_mvpa_data(images, conditions, data_type=np.float32,
         data = f.get_data().astype(data_type)
         [d1, d2, d3, d4] = data.shape
         if random == RandomType.REPRODUCIBLE:
-            _randomize_single_subject(data.reshape((d1 * d2 * d3, d4)),
-                                      seed=sid).reshape((d1,
-                                                         d2,
-                                                         d3,
-                                                         d4))
+            data = data.reshape((d1 * d2 * d3, d4))
+            _randomize_single_subject(data, seed=sid)
+            data = data.reshape((d1, d2, d3, d4))
         elif random == RandomType.UNREPRODUCIBLE:
-            _randomize_single_subject(
-                data.reshape((d1 * d2 * d3, d4))).reshape((d1,
-                                                           d2,
-                                                           d3,
-                                                           d4))
+            data = data.reshape((d1 * d2 * d3, d4))
+            _randomize_single_subject(data)
+            data = data.reshape((d1, d2, d3, d4))
         if processed_data is None:
             processed_data = np.empty([d1, d2, d3, num_epochs],
                                       dtype=data_type)