From 81972f79261c563f9260d5045755ea80f0c002b3 Mon Sep 17 00:00:00 2001 From: Yida Wang Date: Wed, 26 Apr 2017 22:26:50 -0700 Subject: [PATCH 1/8] add random permutation to FCMA --- brainiak/fcma/preprocessing.py | 110 ++++++++++++++++++++++++-- brainiak/fcma/voxelselector.py | 16 +++- brainiak/io.py | 11 +++ examples/fcma/mvpa_voxel_selection.py | 15 +++- examples/fcma/voxel_selection.py | 16 +++- tests/fcma/test_preprocessing.py | 36 ++++++++- 6 files changed, 189 insertions(+), 15 deletions(-) diff --git a/brainiak/fcma/preprocessing.py b/brainiak/fcma/preprocessing.py index 5a620237d..4872c7739 100644 --- a/brainiak/fcma/preprocessing.py +++ b/brainiak/fcma/preprocessing.py @@ -22,12 +22,21 @@ import logging from scipy.stats.mstats import zscore from mpi4py import MPI +from enum import Enum from ..image import mask_images, multimask_images logger = logging.getLogger(__name__) +__all__ = [ + "RandomType", + "prepare_fcma_data", + "generate_epochs_info", + "prepare_mvpa_data", + "prepare_searchlight_mvpa_data", +] + def _separate_epochs(activity_data, epoch_list): """ create data epoch by epoch @@ -37,9 +46,9 @@ def _separate_epochs(activity_data, epoch_list): Parameters ---------- - activity\_data: list of 2D array in shape [nVoxels, nTRs] + activity_data: list of 2D array in shape [nVoxels, nTRs] the masked activity data organized in voxel*TR formats of all subjects - epoch\_list: list of 3D array in shape [condition, nEpochs, nTRs] + epoch_list: list of 3D array in shape [condition, nEpochs, nTRs] specification of epochs and conditions assuming all subjects have the same number of epochs len(epoch_list) equals the number of subjects @@ -83,8 +92,72 @@ def _separate_epochs(activity_data, epoch_list): return raw_data, labels +def _randomize_single_subject(data, seed=None): + """Randomly permute the voxels of the subject. + + The subject is organized as Voxel x TR, + this method shuffles the voxel dimension. + + Parameters + ---------- + data: 2D array in shape [nVxels, nTRs] + Activity data. + seed: Optional[int] + Seed for random state used implicitly for shuffling. + + Returns + data: 2D array in shape [nVxels, nTRs] + Activity data with the voxel dimension shuffled. + """ + if seed is not None: + np.random.seed(seed) + np.random.shuffle(data) + return data + + +def _randomize_subject_list(data_list, random): + """Randomly permute the voxels of a subject list. + + The method shuffles the subject one by one according to + the random type. If RandomType.NORANDOM, return the + original list. + + Parameters + ---------- + data_list: list of 2D array in shape [nVxels, nTRs] + Activity data list. + random: RandomType + Randomization type. + + Returns + data_list: list of 2D array in shape [nVxels, nTRs] + (Randomized) activity data list. + """ + if random == RandomType.REPRODUCIBLE: + data_list = [_randomize_single_subject(data, seed=idx) + for idx, data in enumerate(data_list)] + elif random == RandomType.UNREPRODUCIBLE: + data_list = [_randomize_single_subject(data) + for data in data_list] + return data_list + + +class RandomType(Enum): + """Define the random types as enumeration + + NORANDOM means do not randomize the data; + REPRODUCIBLE means randomize the data with a fixed seed so that the + permutation holds between different runs; + UNREPRODUCIBLE means truly randomize the data which returns different + results in different runs. + """ + NORANDOM = 0 + REPRODUCIBLE = 1 + UNREPRODUCIBLE = 2 + + def prepare_fcma_data(images, conditions, mask1, mask2=None, - comm=MPI.COMM_WORLD): + random=RandomType.NORANDOM, comm=MPI.COMM_WORLD): """Prepare data for correlation-based computation and analysis. Generate epochs of interests, then broadcast to all workers. @@ -102,6 +175,9 @@ def prepare_fcma_data(images, conditions, mask1, mask2=None, If it is not specified, the method will assign None to the returning variable raw_data2 and the self-correlation on raw_data1 will be computed + random: Optional[RandomType] + Randomize the data within subject or not. + Default NORANDOM comm: MPI.Comm MPI communicator to use for MPI operations. @@ -127,9 +203,11 @@ def prepare_fcma_data(images, conditions, mask1, mask2=None, activity_data1, activity_data2 = zip(*multimask_images(images, masks, np.float32)) + _randomize_subject_list(activity_data2, random) raw_data2, _ = _separate_epochs(activity_data2, conditions) else: activity_data1 = list(mask_images(images, mask1, np.float32)) + _randomize_subject_list(activity_data1, random) raw_data1, labels = _separate_epochs(activity_data1, conditions) time1 = time.time() raw_data_length = len(raw_data1) @@ -170,7 +248,7 @@ def generate_epochs_info(epoch_list): Returns ------- - epoch\_info: list of tuple (label, sid, start, end). + epoch_info: list of tuple (label, sid, start, end). label is the condition labels of the epochs; sid is the subject id, corresponding to the index of raw_data; start is the start TR of an epoch (inclusive); @@ -250,7 +328,8 @@ def prepare_mvpa_data(images, conditions, mask): return processed_data, labels -def prepare_searchlight_mvpa_data(images, conditions, data_type=np.float32): +def prepare_searchlight_mvpa_data(images, conditions, data_type=np.float32, + random=RandomType.NORANDOM): """ obtain the data for activity-based voxel selection using Searchlight Average the activity within epochs and z-scoring within subject, @@ -266,10 +345,13 @@ def prepare_searchlight_mvpa_data(images, conditions, data_type=np.float32): Condition specification. data_type Type to cast image to. + random: Optional[RandomType] + Randomize the data within subject or not. + Default NORANDOM Returns ------- - processed\_data: 4D array in shape [brain 3D + epoch] + processed_data: 4D array in shape [brain 3D + epoch] averaged epoch by epoch processed data labels: 1D array @@ -292,8 +374,20 @@ def prepare_searchlight_mvpa_data(images, conditions, data_type=np.float32): for sid, f in enumerate(images): data = f.get_data().astype(data_type) + [d1, d2, d3, d4] = data.shape + if random == RandomType.REPRODUCIBLE: + data = _randomize_single_subject(data.reshape((d1 * d2 * d3, d4)), + seed=sid).reshape((d1, + d2, + d3, + d4)) + elif random == RandomType.UNREPRODUCIBLE: + data = _randomize_single_subject( + data.reshape((d1 * d2 * d3, d4))).reshape((d1, + d2, + d3, + d4)) if processed_data is None: - [d1, d2, d3, _] = data.shape processed_data = np.empty([d1, d2, d3, num_epochs], dtype=data_type) # averaging @@ -303,7 +397,7 @@ def prepare_searchlight_mvpa_data(images, conditions, data_type=np.float32): processed_data[:, :, :, idx] = \ np.mean(data[:, :, :, epoch[2]:epoch[3]], axis=3) - logger.info( + logger.debug( 'file %s is loaded and processed, with data shape %s' % (f.get_filename(), data.shape) ) diff --git a/brainiak/fcma/voxelselector.py b/brainiak/fcma/voxelselector.py index 78ea330a6..ea3331515 100644 --- a/brainiak/fcma/voxelselector.py +++ b/brainiak/fcma/voxelselector.py @@ -163,7 +163,8 @@ def _master(self): the length of array equals the number of voxels """ logger.info( - 'Master starts to allocate tasks' + 'Master at rank %d starts to allocate tasks' % + MPI.COMM_WORLD.Get_rank() ) results = [] comm = MPI.COMM_WORLD @@ -181,6 +182,10 @@ def _master(self): if current_task[1] == 0: using_size = i break + logger.debug( + 'master starts to send a task to worker %d' % + i + ) comm.send(current_task, dest=i, tag=self._WORKTAG) @@ -235,6 +240,10 @@ def _worker(self, clf): ------- None """ + logger.debug( + 'worker %d is running, waiting for tasks from master at rank %d' % + (MPI.COMM_WORLD.Get_rank(), self.master_rank) + ) comm = MPI.COMM_WORLD status = MPI.Status() while 1: @@ -264,6 +273,11 @@ def _correlation_computation(self, task): time1 = time.time() s = task[0] nEpochs = len(self.raw_data) + logger.debug( + 'start to compute the correlation: #epochs: %d, ' + '#processed voxels: %d, #total voxels to compute against: %d' % + (nEpochs, task[1], self.num_voxels2) + ) corr = np.zeros((task[1], nEpochs, self.num_voxels2), np.float32, order='C') count = 0 diff --git a/brainiak/io.py b/brainiak/io.py index 62937bdae..715a6b78f 100644 --- a/brainiak/io.py +++ b/brainiak/io.py @@ -25,12 +25,15 @@ import nibabel as nib import numpy as np +import logging from nibabel.nifti1 import Nifti1Pair from nibabel.spatialimages import SpatialImage from .image import SingleConditionSpec +logger = logging.getLogger(__name__) + def load_images_from_dir(in_dir: Union[str, Path], suffix: str = "nii.gz", ) -> Iterable[SpatialImage]: @@ -59,6 +62,10 @@ def load_images_from_dir(in_dir: Union[str, Path], suffix: str = "nii.gz", files = sorted(in_dir.glob("*" + suffix)) for f in files: yield nib.load(str(f)) + logger.info( + 'file %s is read' % + str(f) + ) def load_images(image_paths: Iterable[Union[str, Path]] @@ -86,6 +93,10 @@ def load_images(image_paths: Iterable[Union[str, Path]] string_path = str(image_path) else: string_path = image_path + logger.info( + 'starts to read file %s' % + string_path + ) yield nib.load(string_path) diff --git a/examples/fcma/mvpa_voxel_selection.py b/examples/fcma/mvpa_voxel_selection.py index 1892d5e80..2327579e3 100644 --- a/examples/fcma/mvpa_voxel_selection.py +++ b/examples/fcma/mvpa_voxel_selection.py @@ -53,11 +53,24 @@ 'mask size: %d' % np.sum(mask) ) - images = io.load_images_from_dir(data_dir, extension) + images = io.load_images_from_dir(data_dir, suffix=extension) conditions = io.load_labels(epoch_file) data, labels = prepare_searchlight_mvpa_data(images, conditions) + + # setting the random argument produces random voxel selection results + # for non-parametric statistical analysis. + # There are three random options: + # RandomType.NORANDOM is the default + # RandomType.REPRODUCIBLE permutes the voxels in the same way every run + # RandomType.UNREPRODUCIBLE permutes the voxels differently across runs + # example + #from brainiak.fcma.preprocessing import RandomType + #data, labels = prepare_searchlight_mvpa_data(images, conditions, + # random=RandomType.UNREPRODUCIBLE) + # the following line is an example to leaving a subject out #epoch_info = [x for x in epoch_info if x[1] != 0] + num_subjs = int(sys.argv[5]) # create a Searchlight object sl = Searchlight(sl_rad=1) diff --git a/examples/fcma/voxel_selection.py b/examples/fcma/voxel_selection.py index 27c570181..1d03e050b 100644 --- a/examples/fcma/voxel_selection.py +++ b/examples/fcma/voxel_selection.py @@ -41,16 +41,30 @@ extension = sys.argv[2] mask_file = sys.argv[3] epoch_file = sys.argv[4] - images = io.load_images_from_dir(data_dir, extension) + images = io.load_images_from_dir(data_dir, suffix=extension) mask = io.load_boolean_mask(mask_file) conditions = io.load_labels(epoch_file) raw_data, _, labels = prepare_fcma_data(images, conditions, mask) + + # setting the random argument produces random voxel selection results + # for non-parametric statistical analysis. + # There are three random options: + # RandomType.NORANDOM is the default + # RandomType.REPRODUCIBLE permutes the voxels in the same way every run + # RandomType.UNREPRODUCIBLE permutes the voxels differently across runs + # example: + # from brainiak.fcma.preprocessing import RandomType + # raw_data, _, labels = prepare_fcma_data(images, conditions, mask, + # random=RandomType.REPRODUCIBLE) + # if providing two masks, just append the second mask as the last input argument # and specify raw_data2 + # example: # images = io.load_images_from_dir(data_dir, extension) # mask2 = io.load_boolean_mask('face_scene/mask.nii.gz') # raw_data, raw_data2, labels = prepare_fcma_data(images, conditions, mask, # mask2) + epochs_per_subj = int(sys.argv[5]) num_subjs = int(sys.argv[6]) # the following line is an example to leaving a subject out diff --git a/tests/fcma/test_preprocessing.py b/tests/fcma/test_preprocessing.py index 554b95e34..908149610 100644 --- a/tests/fcma/test_preprocessing.py +++ b/tests/fcma/test_preprocessing.py @@ -29,7 +29,7 @@ def test_prepare_fcma_data(): - images = io.load_images_from_dir(data_dir, suffix) + images = io.load_images_from_dir(data_dir, suffix=suffix) mask = io.load_boolean_mask(mask_file) conditions = io.load_labels(epoch_file) raw_data, _, labels = prepare_fcma_data(images, conditions, mask) @@ -41,9 +41,22 @@ def test_prepare_fcma_data(): 'raw data do not match in test_prepare_fcma_data' assert np.array_equal(labels, expected_labels), \ 'the labels do not match in test_prepare_fcma_data' + from brainiak.fcma.preprocessing import RandomType + images = io.load_images_from_dir(data_dir, suffix=suffix) + random_raw_data, _, _ = prepare_fcma_data(images, conditions, mask, + random= + RandomType.REPRODUCIBLE) + assert len(random_raw_data) == len(expected_raw_data), \ + 'numbers of epochs do not match in test_prepare_fcma_data' + images = io.load_images_from_dir(data_dir, suffix=suffix) + random_raw_data, _, _ = prepare_fcma_data(images, conditions, mask, + random= + RandomType.UNREPRODUCIBLE) + assert len(random_raw_data) == len(expected_raw_data), \ + 'numbers of epochs do not match in test_prepare_fcma_data' def test_prepare_mvpa_data(): - images = io.load_images_from_dir(data_dir, suffix) + images = io.load_images_from_dir(data_dir, suffix=suffix) mask = io.load_boolean_mask(mask_file) conditions = io.load_labels(epoch_file) processed_data, labels = prepare_mvpa_data(images, conditions, mask) @@ -58,10 +71,10 @@ def test_prepare_mvpa_data(): 'the labels do not match in test_prepare_mvpa_data' def test_prepare_searchlight_mvpa_data(): - images = io.load_images_from_dir(data_dir, suffix) + images = io.load_images_from_dir(data_dir, suffix=suffix) conditions = io.load_labels(epoch_file) processed_data, labels = prepare_searchlight_mvpa_data(images, - conditions) + conditions) expected_searchlight_processed_data = np.load( expected_dir / 'expected_searchlight_processed_data.npy') for idx in range(len(processed_data)): @@ -69,6 +82,21 @@ def test_prepare_searchlight_mvpa_data(): 'raw data do not match in test_prepare_searchlight_mvpa_data' assert np.array_equal(labels, expected_labels), \ 'the labels do not match in test_prepare_searchlight_mvpa_data' + from brainiak.fcma.preprocessing import RandomType + images = io.load_images_from_dir(data_dir, suffix=suffix) + random_processed_data, _ = prepare_searchlight_mvpa_data(images, + conditions, + random= + RandomType.REPRODUCIBLE) + assert len(random_processed_data) == len(expected_searchlight_processed_data), \ + 'numbers of epochs do not match in test_prepare_searchlight_mvpa_data' + images = io.load_images_from_dir(data_dir, suffix=suffix) + random_processed_data, _ = prepare_searchlight_mvpa_data(images, + conditions, + random= + RandomType.UNREPRODUCIBLE) + assert len(random_processed_data) == len(expected_searchlight_processed_data), \ + 'numbers of epochs do not match in test_prepare_searchlight_mvpa_data' if __name__ == '__main__': test_prepare_fcma_data() From 507bbf323208d45240e1c8dccf561e3df207d76a Mon Sep 17 00:00:00 2001 From: Yida Wang Date: Thu, 27 Apr 2017 10:00:00 -0700 Subject: [PATCH 2/8] addressing review comments from Bryn --- brainiak/fcma/preprocessing.py | 20 ++++++++++---------- brainiak/fcma/voxelselector.py | 2 +- brainiak/io.py | 6 +++--- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/brainiak/fcma/preprocessing.py b/brainiak/fcma/preprocessing.py index 4872c7739..f5e761d12 100644 --- a/brainiak/fcma/preprocessing.py +++ b/brainiak/fcma/preprocessing.py @@ -100,13 +100,13 @@ def _randomize_single_subject(data, seed=None): Parameters ---------- - data: 2D array in shape [nVxels, nTRs] + data: 2D array in shape [nVoxels, nTRs] Activity data. seed: Optional[int] Seed for random state used implicitly for shuffling. Returns - data: 2D array in shape [nVxels, nTRs] + data: 2D array in shape [nVoxels, nTRs] Activity data with the voxel dimension shuffled. """ if seed is not None: @@ -134,11 +134,11 @@ def _randomize_subject_list(data_list, random): (Randomized) activity data list. """ if random == RandomType.REPRODUCIBLE: - data_list = [_randomize_single_subject(data, seed=idx) - for idx, data in enumerate(data_list)] + for i in range(len(data_list)): + data_list[i] = _randomize_single_subject(data_list[i], seed=i) elif random == RandomType.UNREPRODUCIBLE: - data_list = [_randomize_single_subject(data) - for data in data_list] + for i in range(len(data_list)): + data_list[i] = _randomize_single_subject(data_list[i]) return data_list @@ -203,11 +203,11 @@ def prepare_fcma_data(images, conditions, mask1, mask2=None, activity_data1, activity_data2 = zip(*multimask_images(images, masks, np.float32)) - _randomize_subject_list(activity_data2, random) + activity_data2 = _randomize_subject_list(activity_data2, random) raw_data2, _ = _separate_epochs(activity_data2, conditions) else: activity_data1 = list(mask_images(images, mask1, np.float32)) - _randomize_subject_list(activity_data1, random) + activity_data1 = _randomize_subject_list(activity_data1, random) raw_data1, labels = _separate_epochs(activity_data1, conditions) time1 = time.time() raw_data_length = len(raw_data1) @@ -398,8 +398,8 @@ def prepare_searchlight_mvpa_data(images, conditions, data_type=np.float32, np.mean(data[:, :, :, epoch[2]:epoch[3]], axis=3) logger.debug( - 'file %s is loaded and processed, with data shape %s' % - (f.get_filename(), data.shape) + 'file %s is loaded and processed, with data shape %s', + f.get_filename(), data.shape ) # z-scoring cur_epoch = 0 diff --git a/brainiak/fcma/voxelselector.py b/brainiak/fcma/voxelselector.py index ea3331515..9cfd0693a 100644 --- a/brainiak/fcma/voxelselector.py +++ b/brainiak/fcma/voxelselector.py @@ -163,7 +163,7 @@ def _master(self): the length of array equals the number of voxels """ logger.info( - 'Master at rank %d starts to allocate tasks' % + 'Master at rank %d starts to allocate tasks', MPI.COMM_WORLD.Get_rank() ) results = [] diff --git a/brainiak/io.py b/brainiak/io.py index 715a6b78f..070119ae7 100644 --- a/brainiak/io.py +++ b/brainiak/io.py @@ -63,8 +63,8 @@ def load_images_from_dir(in_dir: Union[str, Path], suffix: str = "nii.gz", for f in files: yield nib.load(str(f)) logger.info( - 'file %s is read' % - str(f) + 'file %s is read', + f ) @@ -141,7 +141,7 @@ def load_labels(path: Union[str, Path]) -> List[SingleConditionSpec]: List[SingleConditionSpec] List of SingleConditionSpec stored in labels file. """ - condition_specs = np.load(path) + condition_specs = np.load(str(path)) return [c.view(SingleConditionSpec) for c in condition_specs] From 5f04e95705e01df12457988b1c22af97dba1b5ec Mon Sep 17 00:00:00 2001 From: Yida Wang Date: Thu, 27 Apr 2017 11:21:56 -0700 Subject: [PATCH 3/8] addressing review comments from Mihai --- brainiak/fcma/preprocessing.py | 3 ++- brainiak/io.py | 14 ++++++-------- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/brainiak/fcma/preprocessing.py b/brainiak/fcma/preprocessing.py index f5e761d12..ddf4133c1 100644 --- a/brainiak/fcma/preprocessing.py +++ b/brainiak/fcma/preprocessing.py @@ -198,6 +198,7 @@ def prepare_fcma_data(images, conditions, mask1, mask2=None, raw_data1 = [] raw_data2 = [] if rank == 0: + logger.info('start to apply masks and separate epochs') if mask2 is not None: masks = (mask1, mask2) activity_data1, activity_data2 = zip(*multimask_images(images, @@ -347,7 +348,6 @@ def prepare_searchlight_mvpa_data(images, conditions, data_type=np.float32, Type to cast image to. random: Optional[RandomType] Randomize the data within subject or not. - Default NORANDOM Returns ------- @@ -372,6 +372,7 @@ def prepare_searchlight_mvpa_data(images, conditions, data_type=np.float32, # counting the epochs per subject for z-scoring subject_count = np.zeros(len(conditions), dtype=np.int32) + logger.info('start to apply masks and separate epochs') for sid, f in enumerate(images): data = f.get_data().astype(data_type) [d1, d2, d3, d4] = data.shape diff --git a/brainiak/io.py b/brainiak/io.py index 070119ae7..b4e71bd25 100644 --- a/brainiak/io.py +++ b/brainiak/io.py @@ -23,9 +23,9 @@ from pathlib import Path from typing import Callable, Iterable, List, Union +import logging import nibabel as nib import numpy as np -import logging from nibabel.nifti1 import Nifti1Pair from nibabel.spatialimages import SpatialImage @@ -61,11 +61,10 @@ def load_images_from_dir(in_dir: Union[str, Path], suffix: str = "nii.gz", in_dir = Path(in_dir) files = sorted(in_dir.glob("*" + suffix)) for f in files: - yield nib.load(str(f)) - logger.info( - 'file %s is read', - f + logger.debug( + 'starts to read file %s', f ) + yield nib.load(str(f)) def load_images(image_paths: Iterable[Union[str, Path]] @@ -93,9 +92,8 @@ def load_images(image_paths: Iterable[Union[str, Path]] string_path = str(image_path) else: string_path = image_path - logger.info( - 'starts to read file %s' % - string_path + logger.debug( + 'starts to read file %s', string_path ) yield nib.load(string_path) From cec34452d089c12533da4d5e85b775b141958573 Mon Sep 17 00:00:00 2001 From: Yida Wang Date: Thu, 27 Apr 2017 11:23:31 -0700 Subject: [PATCH 4/8] further changes according to the comments --- examples/fcma/mvpa_voxel_selection.py | 4 ++-- examples/fcma/voxel_selection.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/fcma/mvpa_voxel_selection.py b/examples/fcma/mvpa_voxel_selection.py index 2327579e3..807d1f520 100644 --- a/examples/fcma/mvpa_voxel_selection.py +++ b/examples/fcma/mvpa_voxel_selection.py @@ -39,7 +39,7 @@ MPI.COMM_WORLD.Get_size() ) data_dir = sys.argv[1] - extension = sys.argv[2] + suffix = sys.argv[2] mask_file = sys.argv[3] epoch_file = sys.argv[4] @@ -53,7 +53,7 @@ 'mask size: %d' % np.sum(mask) ) - images = io.load_images_from_dir(data_dir, suffix=extension) + images = io.load_images_from_dir(data_dir, suffix=suffix) conditions = io.load_labels(epoch_file) data, labels = prepare_searchlight_mvpa_data(images, conditions) diff --git a/examples/fcma/voxel_selection.py b/examples/fcma/voxel_selection.py index 1d03e050b..a1efd6c8f 100644 --- a/examples/fcma/voxel_selection.py +++ b/examples/fcma/voxel_selection.py @@ -38,10 +38,10 @@ MPI.COMM_WORLD.Get_size() ) data_dir = sys.argv[1] - extension = sys.argv[2] + suffix = sys.argv[2] mask_file = sys.argv[3] epoch_file = sys.argv[4] - images = io.load_images_from_dir(data_dir, suffix=extension) + images = io.load_images_from_dir(data_dir, suffix=suffix) mask = io.load_boolean_mask(mask_file) conditions = io.load_labels(epoch_file) raw_data, _, labels = prepare_fcma_data(images, conditions, mask) From ec340ecc267f14263768c7dd1fd71e99f7a1857a Mon Sep 17 00:00:00 2001 From: Yida Wang Date: Thu, 27 Apr 2017 13:10:57 -0700 Subject: [PATCH 5/8] addressing review comments --- brainiak/io.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/brainiak/io.py b/brainiak/io.py index b4e71bd25..50e32f12a 100644 --- a/brainiak/io.py +++ b/brainiak/io.py @@ -62,7 +62,7 @@ def load_images_from_dir(in_dir: Union[str, Path], suffix: str = "nii.gz", files = sorted(in_dir.glob("*" + suffix)) for f in files: logger.debug( - 'starts to read file %s', f + 'Starting to read file %s', f ) yield nib.load(str(f)) @@ -93,7 +93,7 @@ def load_images(image_paths: Iterable[Union[str, Path]] else: string_path = image_path logger.debug( - 'starts to read file %s', string_path + 'Starting to read file %s', string_path ) yield nib.load(string_path) From 11383d17e26cc60e4e64c9b5ebdc98c1483bdda4 Mon Sep 17 00:00:00 2001 From: Yida Wang Date: Fri, 28 Apr 2017 11:45:21 -0700 Subject: [PATCH 6/8] addressing Bryn's comments --- brainiak/fcma/preprocessing.py | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/brainiak/fcma/preprocessing.py b/brainiak/fcma/preprocessing.py index ddf4133c1..c97a4affb 100644 --- a/brainiak/fcma/preprocessing.py +++ b/brainiak/fcma/preprocessing.py @@ -107,12 +107,11 @@ def _randomize_single_subject(data, seed=None): Returns data: 2D array in shape [nVoxels, nTRs] - Activity data with the voxel dimension shuffled. + Activity data is shuffled in the voxel dimension in place. """ if seed is not None: np.random.seed(seed) np.random.shuffle(data) - return data def _randomize_subject_list(data_list, random): @@ -130,16 +129,16 @@ def _randomize_subject_list(data_list, random): Randomization type. Returns + ------- data_list: list of 2D array in shape [nVxels, nTRs] - (Randomized) activity data list. + (Randomized) activity data list will be modified in place. """ if random == RandomType.REPRODUCIBLE: for i in range(len(data_list)): - data_list[i] = _randomize_single_subject(data_list[i], seed=i) + _randomize_single_subject(data_list[i], seed=i) elif random == RandomType.UNREPRODUCIBLE: - for i in range(len(data_list)): - data_list[i] = _randomize_single_subject(data_list[i]) - return data_list + for data in data_list: + _randomize_single_subject(data) class RandomType(Enum): @@ -204,11 +203,11 @@ def prepare_fcma_data(images, conditions, mask1, mask2=None, activity_data1, activity_data2 = zip(*multimask_images(images, masks, np.float32)) - activity_data2 = _randomize_subject_list(activity_data2, random) + _randomize_subject_list(activity_data2, random) raw_data2, _ = _separate_epochs(activity_data2, conditions) else: activity_data1 = list(mask_images(images, mask1, np.float32)) - activity_data1 = _randomize_subject_list(activity_data1, random) + _randomize_subject_list(activity_data1, random) raw_data1, labels = _separate_epochs(activity_data1, conditions) time1 = time.time() raw_data_length = len(raw_data1) @@ -377,13 +376,13 @@ def prepare_searchlight_mvpa_data(images, conditions, data_type=np.float32, data = f.get_data().astype(data_type) [d1, d2, d3, d4] = data.shape if random == RandomType.REPRODUCIBLE: - data = _randomize_single_subject(data.reshape((d1 * d2 * d3, d4)), - seed=sid).reshape((d1, - d2, - d3, - d4)) + _randomize_single_subject(data.reshape((d1 * d2 * d3, d4)), + seed=sid).reshape((d1, + d2, + d3, + d4)) elif random == RandomType.UNREPRODUCIBLE: - data = _randomize_single_subject( + _randomize_single_subject( data.reshape((d1 * d2 * d3, d4))).reshape((d1, d2, d3, From ec78ccd478b771424735702b0e4d0fe8ce5c72e6 Mon Sep 17 00:00:00 2001 From: Yida Wang Date: Fri, 28 Apr 2017 12:20:12 -0700 Subject: [PATCH 7/8] updating docstrings --- brainiak/fcma/preprocessing.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/brainiak/fcma/preprocessing.py b/brainiak/fcma/preprocessing.py index c97a4affb..333cebc85 100644 --- a/brainiak/fcma/preprocessing.py +++ b/brainiak/fcma/preprocessing.py @@ -96,18 +96,18 @@ def _randomize_single_subject(data, seed=None): """Randomly permute the voxels of the subject. The subject is organized as Voxel x TR, - this method shuffles the voxel dimension. + this method shuffles the voxel dimension in place. Parameters ---------- data: 2D array in shape [nVoxels, nTRs] - Activity data. + Activity data to be shuffled. seed: Optional[int] Seed for random state used implicitly for shuffling. Returns - data: 2D array in shape [nVoxels, nTRs] - Activity data is shuffled in the voxel dimension in place. + ------- + None. """ if seed is not None: np.random.seed(seed) @@ -117,21 +117,19 @@ def _randomize_single_subject(data, seed=None): def _randomize_subject_list(data_list, random): """Randomly permute the voxels of a subject list. - The method shuffles the subject one by one according to - the random type. If RandomType.NORANDOM, return the - original list. + The method shuffles the subject one by one in place according to + the random type. If RandomType.NORANDOM, return the original list. Parameters ---------- data_list: list of 2D array in shape [nVxels, nTRs] - Activity data list. + Activity data list to be shuffled. random: RandomType Randomization type. Returns ------- - data_list: list of 2D array in shape [nVxels, nTRs] - (Randomized) activity data list will be modified in place. + None. """ if random == RandomType.REPRODUCIBLE: for i in range(len(data_list)): From e8d60a82ab2e50ae5435d156f5626e5e1295f07d Mon Sep 17 00:00:00 2001 From: Yida Wang Date: Fri, 28 Apr 2017 13:48:00 -0700 Subject: [PATCH 8/8] fix building error issue --- brainiak/fcma/preprocessing.py | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/brainiak/fcma/preprocessing.py b/brainiak/fcma/preprocessing.py index 333cebc85..9ea3121f7 100644 --- a/brainiak/fcma/preprocessing.py +++ b/brainiak/fcma/preprocessing.py @@ -374,17 +374,13 @@ def prepare_searchlight_mvpa_data(images, conditions, data_type=np.float32, data = f.get_data().astype(data_type) [d1, d2, d3, d4] = data.shape if random == RandomType.REPRODUCIBLE: - _randomize_single_subject(data.reshape((d1 * d2 * d3, d4)), - seed=sid).reshape((d1, - d2, - d3, - d4)) + data = data.reshape((d1 * d2 * d3, d4)) + _randomize_single_subject(data, seed=sid) + data = data.reshape((d1, d2, d3, d4)) elif random == RandomType.UNREPRODUCIBLE: - _randomize_single_subject( - data.reshape((d1 * d2 * d3, d4))).reshape((d1, - d2, - d3, - d4)) + data = data.reshape((d1 * d2 * d3, d4)) + _randomize_single_subject(data) + data = data.reshape((d1, d2, d3, d4)) if processed_data is None: processed_data = np.empty([d1, d2, d3, num_epochs], dtype=data_type)