Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fcma classification #122

Merged
merged 20 commits into from
Sep 30, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
550825d
start working on fcma classification
yidawang Sep 27, 2016
abcf911
use gemm to compute kernel matrix for training
yidawang Sep 27, 2016
3ce46f3
prediction is in
yidawang Sep 27, 2016
f676368
use blas to compute similarity matrix, test
yidawang Sep 27, 2016
7e77275
use gemm to compute similarity matrix in prediction
yidawang Sep 27, 2016
4f21899
Merge remote-tracking branch 'upstream/master' into fcma_classification
yidawang Sep 28, 2016
4dac89d
add model dump and load; try logestic regression
yidawang Sep 28, 2016
af6026b
formatting; add test code for fcma classification
yidawang Sep 28, 2016
ab5449f
use Hamming distance to measure the results and the expected results
yidawang Sep 28, 2016
05b8efa
more samples for fcma classification testing
yidawang Sep 28, 2016
927df34
docstrings to cython_blas.pyx
yidawang Sep 28, 2016
932c514
add number of training samples as an element of Classifier of FCMA
yidawang Sep 28, 2016
ee6e36c
typo
yidawang Sep 29, 2016
e6bcf64
Merge remote-tracking branch 'upstream/master' into fcma_classification
yidawang Sep 29, 2016
e910a5a
Merge remote-tracking branch 'upstream/master' into fcma_classification
yidawang Sep 29, 2016
f843f72
docstrings of classifier.py
yidawang Sep 29, 2016
8974ca6
typo
yidawang Sep 29, 2016
a2dd999
impose a pattern to test_classification; address comments of PR reviews
yidawang Sep 29, 2016
c80b884
add Attributes to the class docstring
yidawang Sep 29, 2016
d1581df
use PEP8 name convention; escape for in docstring
yidawang Sep 29, 2016
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
280 changes: 280 additions & 0 deletions brainiak/fcma/classifier.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,280 @@
# Copyright 2016 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Full Correlation Matrix Analysis (FCMA)

This implementation is based on the following publications:

.. [Wang2015] Full correlation matrix analysis (FCMA): An unbiased method for
task-related functional connectivity",
Yida Wang, Jonathan D Cohen, Kai Li, Nicholas B Turk-Browne.
Journal of Neuroscience Methods, 2015.
"""

# Authors: Yida Wang
# (Intel Labs), 2016

import numpy as np
import time
from sklearn.base import BaseEstimator
import sklearn
from . import fcma_extension
from . import cython_blas as blas
import logging

logger = logging.getLogger(__name__)

__all__ = [
"Classifier",
]


class Classifier(BaseEstimator):
"""Correlation-based classification component of FCMA

Parameters
----------

clf: class
The classifier used, normally a classifier class of sklearn

epochs_per_subj: int, default 0
The number of epochs of each subject
within-subject normalization will be performed during
classifier training if epochs_per_subj is specified
default 0 means no within-subject normalization


Attributes
----------

training_data_: 2D numpy array in shape [num_samples, num_features]
default None
training_data is None except clf is SVM.SVC with precomputed kernel,
in which case training data is needed to compute
the similarity vector for each sample to be classified

num_voxels_: int
The number of voxels per brain used in this classifier
this is defined by the applied mask, normally the top voxels
selected by FCMA voxel selection
num_voxels must be consistent in both training and classification

num_samples_: int
The number of samples of the training set
"""
def __init__(self,
clf,
epochs_per_subj=0):
self.clf = clf
self.epochs_per_subj = epochs_per_subj
return

def fit(self, X, y):
""" use correlation data to train a model

first compute the correlation of the input data,
and then normalize within subject
if more than one sample in one subject,
and then fit to a model defined by self.clf.

Parameters
----------
X: a list of numpy array in shape [num_TRs, num_voxels]
X contains the activity data filtered by top voxels
and prepared for correlation computation.
assuming all elements of X has the same num_voxels value
y: labels, len(X) equals len(Y)

Returns
-------
self: return the object itself
"""
time1 = time.time()
assert len(X) == len(y), \
'the number of samples does not match the number labels'
num_samples = len(X)
num_voxels = X[0].shape[1] # see assumption above
self.num_voxels_ = num_voxels
self.num_samples_ = num_samples
corr_data = np.zeros((num_samples, num_voxels, num_voxels),
np.float32, order='C')
# compute correlation
count = 0
for data in X:
num_TRs = data.shape[0]
# syrk performs slower in this case
# blas.compute_single_self_correlation_syrk('L', 'N',
# num_voxels,
# num_TRs,
# 1.0, data,
# num_voxels, 0.0,
# corr_data,
# num_voxels, count)
blas.compute_single_self_correlation_gemm('N', 'T',
num_voxels,
num_voxels,
num_TRs,
1.0, data,
num_voxels, num_voxels,
0.0, corr_data,
num_voxels, count)
count += 1
logger.debug(
'correlation computation done'
)
# normalize if necessary
if self.epochs_per_subj > 0:
corr_data = corr_data.reshape(1,
num_samples,
num_voxels * num_voxels)
fcma_extension.normalization(corr_data, self.epochs_per_subj)
corr_data = corr_data.reshape(num_samples, num_voxels, num_voxels)
logger.debug(
'normalization done'
)
# training
if isinstance(self.clf, sklearn.svm.SVC) \
and self.clf.kernel == 'precomputed':
kernel_matrix = np.zeros((num_samples, num_samples),
np.float32,
order='C')
# for using kernel matrix computation from voxel selection
corr_data = corr_data.reshape(1,
num_samples,
num_voxels * num_voxels)
blas.compute_kernel_matrix('L', 'T',
num_samples, num_voxels * num_voxels,
1.0, corr_data,
0, num_voxels * num_voxels,
0.0, kernel_matrix, num_samples)
data = kernel_matrix
# training data is in shape [num_samples, num_voxels * num_voxels]
self.training_data_ = corr_data.reshape(num_samples,
num_voxels * num_voxels)
logger.debug(
'kernel computation done'
)
else:
data = corr_data.reshape(num_samples, num_voxels * num_voxels)
self.training_data_ = None

self.clf = self.clf.fit(data, y)
time2 = time.time()
logger.info(
'training done, takes %.2f s' %
(time2 - time1)
)
return self

def predict(self, X):
""" use a trained model to predict correlation data

first compute the correlation of the input data,
and then normalize across all samples in the list
if len(X) > 1,
and then predict via self.clf.

Parameters
----------
X: a list of numpy array in shape [num_TRs, self.num_voxels\_]
X contains the activity data filtered by top voxels
and prepared for correlation computation.
len(X) is the number of test samples
if len(X) > 0: normalization is done
on all test samples

Returns
-------
y_pred: the predicted label of X, in shape [len(X),]
"""
time1 = time.time()
num_test_samples = len(X)
assert num_test_samples > 0, \
'at least one sample is needed'
corr_data = np.zeros((num_test_samples,
self.num_voxels_,
self.num_voxels_),
np.float32,
order='C')
# compute correlation
count = 0
for data in X:
num_TRs = data.shape[0]
num_voxels = data.shape[1]
assert self.num_voxels_ == num_voxels, \
'the number of voxels provided by X does not match ' \
'the number of voxels defined in the model'
blas.compute_single_self_correlation_gemm('N', 'T',
num_voxels,
num_voxels,
num_TRs,
1.0, data,
num_voxels, num_voxels,
0.0, corr_data,
num_voxels, count)
count += 1
logger.debug(
'correlation computation done'
)
# normalize if necessary
if num_test_samples > 1:
corr_data = corr_data.reshape(1,
num_test_samples,
num_voxels * num_voxels)
fcma_extension.normalization(corr_data,
num_test_samples)
corr_data = corr_data.reshape(num_test_samples,
num_voxels,
num_voxels)
logger.debug(
'normalization done'
)
# predict
if isinstance(self.clf, sklearn.svm.SVC) \
and self.clf.kernel == 'precomputed':
assert self.training_data_ is not None, \
'when using precomputed kernel of SVM, ' \
'all training data must be provided'
num_training_samples = self.training_data_.shape[0]
data = np.zeros((num_test_samples, num_training_samples),
np.float32,
order='C')
corr_data = corr_data.reshape(num_test_samples,
num_voxels * num_voxels)
# compute the similarity matrix using corr_data and training_data
blas.compute_single_matrix_multiplication('T', 'N',
num_training_samples,
num_test_samples,
num_voxels * num_voxels,
1.0,
self.training_data_,
num_voxels * num_voxels,
corr_data,
num_voxels * num_voxels,
0.0,
data,
num_training_samples)
logger.debug(
'similarity matrix computation done'
)
else:
data = corr_data.reshape(num_test_samples,
num_voxels * num_voxels)
y_pred = self.clf.predict(data)
time2 = time.time()
logger.info(
'prediction done, takes %.2f s' %
(time2 - time1)
)
return y_pred
Loading