New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Random permutation of FCMA #217
Changes from 6 commits
81972f7
507bbf3
5f04e95
cec3445
ec340ec
0e28424
e90671a
11383d1
ec78ccd
e8d60a8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,12 +22,21 @@ | |
import logging | ||
from scipy.stats.mstats import zscore | ||
from mpi4py import MPI | ||
from enum import Enum | ||
|
||
from ..image import mask_images, multimask_images | ||
|
||
|
||
logger = logging.getLogger(__name__) | ||
|
||
__all__ = [ | ||
"RandomType", | ||
"prepare_fcma_data", | ||
"generate_epochs_info", | ||
"prepare_mvpa_data", | ||
"prepare_searchlight_mvpa_data", | ||
] | ||
|
||
|
||
def _separate_epochs(activity_data, epoch_list): | ||
""" create data epoch by epoch | ||
|
@@ -37,9 +46,9 @@ def _separate_epochs(activity_data, epoch_list): | |
|
||
Parameters | ||
---------- | ||
activity\_data: list of 2D array in shape [nVoxels, nTRs] | ||
activity_data: list of 2D array in shape [nVoxels, nTRs] | ||
the masked activity data organized in voxel*TR formats of all subjects | ||
epoch\_list: list of 3D array in shape [condition, nEpochs, nTRs] | ||
epoch_list: list of 3D array in shape [condition, nEpochs, nTRs] | ||
specification of epochs and conditions | ||
assuming all subjects have the same number of epochs | ||
len(epoch_list) equals the number of subjects | ||
|
@@ -83,8 +92,72 @@ def _separate_epochs(activity_data, epoch_list): | |
return raw_data, labels | ||
|
||
|
||
def _randomize_single_subject(data, seed=None): | ||
"""Randomly permute the voxels of the subject. | ||
|
||
The subject is organized as Voxel x TR, | ||
this method shuffles the voxel dimension. | ||
|
||
Parameters | ||
---------- | ||
data: 2D array in shape [nVoxels, nTRs] | ||
Activity data. | ||
seed: Optional[int] | ||
Seed for random state used implicitly for shuffling. | ||
|
||
Returns | ||
data: 2D array in shape [nVoxels, nTRs] | ||
Activity data with the voxel dimension shuffled. | ||
""" | ||
if seed is not None: | ||
np.random.seed(seed) | ||
np.random.shuffle(data) | ||
return data | ||
|
||
|
||
def _randomize_subject_list(data_list, random): | ||
"""Randomly permute the voxels of a subject list. | ||
|
||
The method shuffles the subject one by one according to | ||
the random type. If RandomType.NORANDOM, return the | ||
original list. | ||
|
||
Parameters | ||
---------- | ||
data_list: list of 2D array in shape [nVxels, nTRs] | ||
Activity data list. | ||
random: RandomType | ||
Randomization type. | ||
|
||
Returns | ||
data_list: list of 2D array in shape [nVxels, nTRs] | ||
(Randomized) activity data list. | ||
""" | ||
if random == RandomType.REPRODUCIBLE: | ||
for i in range(len(data_list)): | ||
data_list[i] = _randomize_single_subject(data_list[i], seed=i) | ||
elif random == RandomType.UNREPRODUCIBLE: | ||
for i in range(len(data_list)): | ||
data_list[i] = _randomize_single_subject(data_list[i]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is better, but still mixed usage. If you're going to do the change in place, don't return a value. If you're going to return a value, it should be a new value. So if you want to use in-place, it should look like:
So neither _randomize_single_subject nor this function should return a value. Also, note since you're modifying in place, you don't need to assign to indexes, so you can use the simpler for loop. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
return data_list | ||
|
||
|
||
class RandomType(Enum): | ||
"""Define the random types as enumeration | ||
|
||
NORANDOM means do not randomize the data; | ||
REPRODUCIBLE means randomize the data with a fixed seed so that the | ||
permutation holds between different runs; | ||
UNREPRODUCIBLE means truly randomize the data which returns different | ||
results in different runs. | ||
""" | ||
NORANDOM = 0 | ||
REPRODUCIBLE = 1 | ||
UNREPRODUCIBLE = 2 | ||
|
||
|
||
def prepare_fcma_data(images, conditions, mask1, mask2=None, | ||
comm=MPI.COMM_WORLD): | ||
random=RandomType.NORANDOM, comm=MPI.COMM_WORLD): | ||
"""Prepare data for correlation-based computation and analysis. | ||
|
||
Generate epochs of interests, then broadcast to all workers. | ||
|
@@ -102,6 +175,9 @@ def prepare_fcma_data(images, conditions, mask1, mask2=None, | |
If it is not specified, the method will assign None to the returning | ||
variable raw_data2 and the self-correlation on raw_data1 will be | ||
computed | ||
random: Optional[RandomType] | ||
Randomize the data within subject or not. | ||
Default NORANDOM | ||
comm: MPI.Comm | ||
MPI communicator to use for MPI operations. | ||
|
||
|
@@ -122,14 +198,17 @@ def prepare_fcma_data(images, conditions, mask1, mask2=None, | |
raw_data1 = [] | ||
raw_data2 = [] | ||
if rank == 0: | ||
logger.info('start to apply masks and separate epochs') | ||
if mask2 is not None: | ||
masks = (mask1, mask2) | ||
activity_data1, activity_data2 = zip(*multimask_images(images, | ||
masks, | ||
np.float32)) | ||
activity_data2 = _randomize_subject_list(activity_data2, random) | ||
raw_data2, _ = _separate_epochs(activity_data2, conditions) | ||
else: | ||
activity_data1 = list(mask_images(images, mask1, np.float32)) | ||
activity_data1 = _randomize_subject_list(activity_data1, random) | ||
raw_data1, labels = _separate_epochs(activity_data1, conditions) | ||
time1 = time.time() | ||
raw_data_length = len(raw_data1) | ||
|
@@ -170,7 +249,7 @@ def generate_epochs_info(epoch_list): | |
|
||
Returns | ||
------- | ||
epoch\_info: list of tuple (label, sid, start, end). | ||
epoch_info: list of tuple (label, sid, start, end). | ||
label is the condition labels of the epochs; | ||
sid is the subject id, corresponding to the index of raw_data; | ||
start is the start TR of an epoch (inclusive); | ||
|
@@ -250,7 +329,8 @@ def prepare_mvpa_data(images, conditions, mask): | |
return processed_data, labels | ||
|
||
|
||
def prepare_searchlight_mvpa_data(images, conditions, data_type=np.float32): | ||
def prepare_searchlight_mvpa_data(images, conditions, data_type=np.float32, | ||
random=RandomType.NORANDOM): | ||
""" obtain the data for activity-based voxel selection using Searchlight | ||
|
||
Average the activity within epochs and z-scoring within subject, | ||
|
@@ -266,10 +346,12 @@ def prepare_searchlight_mvpa_data(images, conditions, data_type=np.float32): | |
Condition specification. | ||
data_type | ||
Type to cast image to. | ||
random: Optional[RandomType] | ||
Randomize the data within subject or not. | ||
|
||
Returns | ||
------- | ||
processed\_data: 4D array in shape [brain 3D + epoch] | ||
processed_data: 4D array in shape [brain 3D + epoch] | ||
averaged epoch by epoch processed data | ||
|
||
labels: 1D array | ||
|
@@ -290,10 +372,23 @@ def prepare_searchlight_mvpa_data(images, conditions, data_type=np.float32): | |
# counting the epochs per subject for z-scoring | ||
subject_count = np.zeros(len(conditions), dtype=np.int32) | ||
|
||
logger.info('start to apply masks and separate epochs') | ||
for sid, f in enumerate(images): | ||
data = f.get_data().astype(data_type) | ||
[d1, d2, d3, d4] = data.shape | ||
if random == RandomType.REPRODUCIBLE: | ||
data = _randomize_single_subject(data.reshape((d1 * d2 * d3, d4)), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You could make this reshaping logic part of _randomize_single_subject (i.e. have it flatten, randomize, then reshape into whatever shape the original array was). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I didn't do this because I would like to make the input argument |
||
seed=sid).reshape((d1, | ||
d2, | ||
d3, | ||
d4)) | ||
elif random == RandomType.UNREPRODUCIBLE: | ||
data = _randomize_single_subject( | ||
data.reshape((d1 * d2 * d3, d4))).reshape((d1, | ||
d2, | ||
d3, | ||
d4)) | ||
if processed_data is None: | ||
[d1, d2, d3, _] = data.shape | ||
processed_data = np.empty([d1, d2, d3, num_epochs], | ||
dtype=data_type) | ||
# averaging | ||
|
@@ -303,9 +398,9 @@ def prepare_searchlight_mvpa_data(images, conditions, data_type=np.float32): | |
processed_data[:, :, :, idx] = \ | ||
np.mean(data[:, :, :, epoch[2]:epoch[3]], axis=3) | ||
|
||
logger.info( | ||
'file %s is loaded and processed, with data shape %s' % | ||
(f.get_filename(), data.shape) | ||
logger.debug( | ||
'file %s is loaded and processed, with data shape %s', | ||
f.get_filename(), data.shape | ||
) | ||
# z-scoring | ||
cur_epoch = 0 | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -23,6 +23,7 @@ | |
from pathlib import Path | ||
from typing import Callable, Iterable, List, Union | ||
|
||
import logging | ||
import nibabel as nib | ||
import numpy as np | ||
|
||
|
@@ -31,6 +32,8 @@ | |
|
||
from .image import SingleConditionSpec | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
def load_images_from_dir(in_dir: Union[str, Path], suffix: str = "nii.gz", | ||
) -> Iterable[SpatialImage]: | ||
|
@@ -58,6 +61,9 @@ def load_images_from_dir(in_dir: Union[str, Path], suffix: str = "nii.gz", | |
in_dir = Path(in_dir) | ||
files = sorted(in_dir.glob("*" + suffix)) | ||
for f in files: | ||
logger.debug( | ||
'Starting to read file %s', f | ||
) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would say you should use the debug level for the messages you added in "io.py". Also, do not break lines if they do not exceed the maximum line length. Finally, the other message placement, before There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done, but I would put some brief There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sure, but not in "io.py". There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why not? I think within the io methods are the best places to go. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. anyway, I will leave There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Because they are not useful. Why would a function log an info-level message saying it has been called every time it is called? Such messages could be emitted by the caller, if relevant. We can discuss merging the load functions, but I would rather not, because "_dir" requires a suffix as well, so we would need to explain that. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. well, the suffix can be an optional argument. Let's discuss this in the next standup. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please write sentences in log messages, at least in this file. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
yield nib.load(str(f)) | ||
|
||
|
||
|
@@ -86,6 +92,9 @@ def load_images(image_paths: Iterable[Union[str, Path]] | |
string_path = str(image_path) | ||
else: | ||
string_path = image_path | ||
logger.debug( | ||
'Starting to read file %s', string_path | ||
) | ||
yield nib.load(string_path) | ||
|
||
|
||
|
@@ -130,7 +139,7 @@ def load_labels(path: Union[str, Path]) -> List[SingleConditionSpec]: | |
List[SingleConditionSpec] | ||
List of SingleConditionSpec stored in labels file. | ||
""" | ||
condition_specs = np.load(path) | ||
condition_specs = np.load(str(path)) | ||
return [c.view(SingleConditionSpec) for c in condition_specs] | ||
|
||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good! Now just need to remove the Returns section from docs of both of these functions since they don't return anything anymore.