Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fcma mvpa searchlight #154

Merged
merged 10 commits into from Jan 12, 2017
6 changes: 6 additions & 0 deletions .gitignore
Expand Up @@ -7,6 +7,9 @@
*.o
*.npy
*.log
*.nii.gz
*.zip
*.pkl

brainiak.egg-info
build
Expand All @@ -21,6 +24,9 @@ htmlcov
.ipynb_checkpoints
junit-*.xml
__pycache__
.DS_Store
__MACOSX

brainiak/fcma/cython_blas.c
brainiak/eventseg/_utils.c
brainiak/examples/fcma/face_scene/
19 changes: 18 additions & 1 deletion brainiak/fcma/__init__.py
Expand Up @@ -11,4 +11,21 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Full correlation matrix analysis"""
"""Full correlation matrix analysis

The implementation is based on the following publications:

.. [Wang2015-1] Full correlation matrix analysis (FCMA): An unbiased method for
task-related functional connectivity",
Yida Wang, Jonathan D Cohen, Kai Li, Nicholas B Turk-Browne.
Journal of Neuroscience Methods, 2015.

.. [Wang2015-2] "Full correlation matrix analysis of fMRI data on Intel® Xeon
Phi™ coprocessors",
Yida Wang, Michael J. Anderson, Jonathan D. Cohen, Alexander Heinecke,
Kai Li, Nadathur Satish, Narayanan Sundaram, Nicholas B. Turk-Browne,
Theodore L. Willke.
In Proceedings of the International Conference for
High Performance Computing,
Networking, Storage and Analysis. 2015.
"""
7 changes: 1 addition & 6 deletions brainiak/fcma/classifier.py
Expand Up @@ -13,12 +13,7 @@
# limitations under the License.
"""Full Correlation Matrix Analysis (FCMA)

This implementation is based on the following publications:

.. [Wang2015] Full correlation matrix analysis (FCMA): An unbiased method for
task-related functional connectivity",
Yida Wang, Jonathan D Cohen, Kai Li, Nicholas B Turk-Browne.
Journal of Neuroscience Methods, 2015.
Correlation-based training and prediction
"""

# Authors: Yida Wang
Expand Down
187 changes: 187 additions & 0 deletions brainiak/fcma/mvpa_voxelselector.py
@@ -0,0 +1,187 @@
# Copyright 2016 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Full Correlation Matrix Analysis (FCMA)

Activity-based voxel selection
"""

# Authors: Yida Wang
# (Intel Labs), 2017

import numpy as np
from sklearn import model_selection
from scipy.stats.mstats import zscore
import logging
from mpi4py import MPI

logger = logging.getLogger(__name__)

__all__ = [
"MVPAVoxelSelector",
]


class MVPAVoxelSelector:
"""Activity-based voxel selection component of FCMA

Parameters
----------

raw\_data: list of 4D array
each element of the list is the raw data of a subject,
in the shape of [x, y, z, t]

mask: 3D array

epoch\_info: list of tuple (label, sid, start, end).
label is the condition labels of the epochs;
sid is the subject id, corresponding to the index of raw_data;
start is the start TR of an epoch (inclusive);
end is the end TR of an epoch(exclusive).
Assuming len(labels) labels equals the number of epochs and
the epochs of the same sid are adjacent in epoch_info

num\_folds: int
the number of folds to be conducted in the cross validation

sl: Searchlight
the distributed Searchlight object

processed\_data\_: 4D array in shape [brain 3D + epoch]
contains the averaged and normalized brain data epoch by epoch
it is generated from raw\_data and epoch\_info

labels\_: 1D array
contains the labels of the epochs, extracted from epoch\_info
"""
def __init__(self,
raw_data,
mask,
epoch_info,
num_folds,
sl
):
self.raw_data = raw_data
self.mask = mask.astype(np.bool)
self.epoch_info = epoch_info
self.num_folds = num_folds
self.sl = sl
self.processed_data_ = None
self.labels_ = None
num_voxels = np.sum(self.mask)
if num_voxels == 0:
raise ValueError('Zero processed voxels')

def _preprocess_data(self):
""" process the raw data according to epoch info

This is done in rank 0 which has the raw_data read in
Average the activity within epochs and z-scoring within subject.
Write the results to self.processed_data,
which is a 4D array of averaged epoch by epoch processed data
Also write the labels to self.label as a 1D numpy array
"""
logger.info(
'mask size: %d' %
np.sum(self.mask)
)
num_epochs = len(self.epoch_info)
(d1, d2, d3, _) = self.raw_data[0].shape
self.processed_data_ = np.empty([d1, d2, d3, num_epochs])
self.labels_ = np.empty(num_epochs)
subject_count = [0] # counting the epochs per subject for z-scoring
cur_sid = -1
# averaging
for idx, epoch in enumerate(self.epoch_info):
self.labels_[idx] = epoch[0]
if cur_sid != epoch[1]:
subject_count.append(0)
cur_sid = epoch[1]
subject_count[-1] += 1
self.processed_data_[:, :, :, idx] = \
np.mean(self.raw_data[cur_sid][:, :, :, epoch[2]:epoch[3]],
axis=3)
# z-scoring
cur_epoch = 0
for i in subject_count:
if i > 1:
self.processed_data_[:, :, :, cur_epoch:cur_epoch + i] = \
zscore(self.processed_data_[:, :, :,
cur_epoch:cur_epoch + i],
axis=3, ddof=0)
cur_epoch += i
# if zscore fails (standard deviation is zero),
# set all values to be zero
self.processed_data_ = np.nan_to_num(self.processed_data_)

def run(self, clf):
""" run activity-based voxel selection

Sort the voxels based on the cross-validation accuracy
of their activity vectors within the searchlight

Parameters
----------
clf: classification function
the classifier to be used in cross validation

Returns
-------
result_volume: 3D array of accuracy numbers
contains the voxelwise accuracy numbers obtained via Searchlight
results: list of tuple (voxel_id, accuracy)
the accuracy numbers of all voxels, in accuracy descending order
the length of array equals the number of voxels
"""
rank = MPI.COMM_WORLD.Get_rank()
if rank == 0:
logger.info(
'running activity-based voxel selection via Searchlight'
)
if rank == 0:
self._preprocess_data()
self.sl.distribute([self.processed_data_], self.mask)
self.sl.broadcast((self.labels_, self.num_folds))
if rank == 0:
logger.info(
'data preparation done'
)

# Searchlight kernel function
def _sfn(l, mask, myrad, bcast_var):
data = l[0][mask, :].T
# print(l[0].shape, mask.shape, data.shape)
skf = model_selection.StratifiedKFold(n_splits=bcast_var[1],
shuffle=False)
accuracy = np.mean(model_selection.cross_val_score(clf, data,
y=bcast_var[0],
cv=skf,
n_jobs=1))
return accuracy
# obtain a 3D array with accuracy numbers
result_volume = self.sl.run_searchlight(_sfn)
# get result tuple list from the volume
result_list = result_volume[self.mask]
results = []
if rank == 0:
for idx, value in enumerate(result_list):
if value is None:
value = 0
results.append((idx, value))
# Sort the voxels
results.sort(key=lambda tup: tup[1], reverse=True)
logger.info(
'activity-based voxel selection via Searchlight is done'
)
return result_volume, results
16 changes: 1 addition & 15 deletions brainiak/fcma/voxelselector.py
Expand Up @@ -13,21 +13,7 @@
# limitations under the License.
"""Full Correlation Matrix Analysis (FCMA)

This implementation is based on the following publications:

.. [Wang2015-1] Full correlation matrix analysis (FCMA): An unbiased method for
task-related functional connectivity",
Yida Wang, Jonathan D Cohen, Kai Li, Nicholas B Turk-Browne.
Journal of Neuroscience Methods, 2015.

.. [Wang2015-2] "Full correlation matrix analysis of fMRI data on Intel® Xeon
Phi™ coprocessors",
Yida Wang, Michael J. Anderson, Jonathan D. Cohen, Alexander Heinecke,
Kai Li, Nadathur Satish, Narayanan Sundaram, Nicholas B. Turk-Browne,
Theodore L. Willke.
In Proceedings of the International Conference for
High Performance Computing,
Networking, Storage and Analysis. 2015.
Correlation-based voxel selection
"""

# Authors: Yida Wang
Expand Down
4 changes: 4 additions & 0 deletions examples/fcma/download_data.sh
@@ -0,0 +1,4 @@
#!/bin/sh
curl --location -o face_scene.zip https://www.dropbox.com/s/11adrjdkt0w1tr3/face_scene.zip?dl=0
unzip -qo face_scene.zip
rm -f face_scene.zip
56 changes: 55 additions & 1 deletion examples/fcma/file_io.py
Expand Up @@ -13,6 +13,7 @@
# limitations under the License.

import nibabel as nib
from nibabel.nifti1 import Nifti1Pair
import os
import math
import time
Expand Down Expand Up @@ -126,7 +127,6 @@ def separate_epochs(activity_data, epoch_list):
)
return raw_data, labels


def prepare_data(data_dir, extension, mask_file, epoch_file):
""" read the data in and generate epochs of interests,
then broadcast to all workers
Expand Down Expand Up @@ -178,3 +178,57 @@ def prepare_data(data_dir, extension, mask_file, epoch_file):
(time2 - time1)
)
return raw_data, labels

def generate_epochs_info(epoch_list):
""" use epoch_list to generate epoch_info defined below

Parameters
----------
epoch\_list: list of 3D (binary) array in shape [condition, nEpochs, nTRs]
Contains specification of epochs and conditions,
Assumption: 1. all subjects have the same number of epochs;
2. len(epoch_list) equals the number of subjects;
3. an epoch is always a continuous time course.

Returns
-------
epoch\_info: list of tuple (label, sid, start, end).
label is the condition labels of the epochs;
sid is the subject id, corresponding to the index of raw_data;
start is the start TR of an epoch (inclusive);
end is the end TR of an epoch(exclusive).
Assuming len(labels) labels equals the number of epochs and
the epochs of the same sid are adjacent in epoch_info
"""
time1 = time.time()
epoch_info = []
for sid, epoch in enumerate(epoch_list):
for cond in range(epoch.shape[0]):
sub_epoch = epoch[cond, :, :]
for eid in range(epoch.shape[1]):
r = np.sum(sub_epoch[eid, :])
if r > 0: # there is an epoch in this condition
start = np.nonzero(sub_epoch[eid, :])[0][0]
epoch_info.append((cond, sid, start, start+r))
time2 = time.time()
logger.debug(
'epoch separation done, takes %.2f s' %
(time2 - time1)
)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't debug more appropriate for timing messages?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

return epoch_info


def write_nifti_file(data, affine, filename):
""" write a nifti file given data and affine

Parameters
----------
data: 3D/4D numpy array
the brain data with/without time dimension
affine: 2D numpy array
affine of the image, usually inherited from an existing image
filename: string
the output filename
"""
img = Nifti1Pair(data, affine)
nib.nifti1.save(img, filename)