Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update to eventseg #266

Merged
merged 4 commits into from Sep 20, 2017
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
65 changes: 44 additions & 21 deletions brainiak/eventseg/event.py
Expand Up @@ -33,6 +33,7 @@
import copy
from sklearn.base import BaseEstimator
from sklearn.utils.validation import check_is_fitted, check_array
from sklearn.exceptions import NotFittedError

from . import _utils as utils # type: ignore

Expand Down Expand Up @@ -86,8 +87,25 @@ def __init__(self, n_events=2,
step_var=_default_var_schedule,
n_iter=500):
self.n_events = n_events
self.classes_ = np.arange(self.n_events)
self.step_var = step_var
self.n_iter = n_iter
self.event_var_ = None

# Set up transition matrix, with final sink state
# For transition matrix of this form, the transition probability has
# no impact on the final solution, since all valid paths must take
# the same number of transitions
self.p_start = np.zeros((1, self.n_events + 1))
self.p_start[0, 0] = 1
self.P = np.vstack((np.hstack((
0.5 * np.diag(np.ones(self.n_events))
+ 0.5 * np.diag(np.ones(self.n_events - 1), 1),
np.append(np.zeros((self.n_events - 1, 1)), [[0.5]], axis=0))),
np.append(np.zeros((1, self.n_events)), [[1]],
axis=1)))
self.p_end = np.zeros((1, self.n_events + 1))
self.p_end[0, -2] = 1

def fit(self, X, y=None):
"""Learn a segmentation on training data
Expand Down Expand Up @@ -126,22 +144,6 @@ def fit(self, X, y=None):
for i in range(n_train):
X[i] = stats.zscore(X[i], axis=1, ddof=1)

# Set up transition matrix, with final sink state
# For transition matrix of this form, the transition probability has
# no impact on the final solution, since all valid paths must take
# the same number of transitions
self.p_start = np.zeros((1, self.n_events + 1))
self.p_start[0, 0] = 1
self.P = np.vstack((np.hstack((
0.5 * np.diag(np.ones(self.n_events))
+ 0.5 * np.diag(np.ones(self.n_events - 1), 1),
np.append(np.zeros((self.n_events - 1, 1)), [[0.5]], axis=0))),
np.append(np.zeros((1, self.n_events)), [[1]], axis=1)))
self.p_end = np.zeros((1, self.n_events + 1))
self.p_end[0, -2] = 1

self.classes_ = np.arange(self.n_events)

# Initialize variables for fitting
log_gamma = []
for i in range(n_train):
Expand Down Expand Up @@ -313,12 +315,28 @@ def _log(self, x):
y = utils.masked_log(_x)
return y.reshape(xshape)

def set_event_patterns(self, event_pat):
"""Set HMM event patterns manually

Rather than fitting the event patterns automatically using fit(), this
function allows them to be set explicitly. They can then be used to
find corresponding events in a new dataset, using find_events().

Parameters
----------
event_pat: voxel by event ndarray
"""
if event_pat.shape[1] != self.n_events:
raise ValueError(("Number of columns of event_pat must match "
"number of events"))
self.event_pat_ = event_pat.copy()

def find_events(self, testing_data, var=None, scramble=False):
"""Applies learned event segmentation to new testing dataset

After fitting an event segmentation using fit(),
this function finds the same sequence of event patterns in a new
testing dataset.
After fitting an event segmentation using fit() or setting event
patterns directly using set_event_patterns(), this function finds the
same sequence of event patterns in a new testing dataset.

Parameters
----------
Expand All @@ -328,7 +346,8 @@ def find_events(self, testing_data, var=None, scramble=False):
var: float or 1D ndarray of length equal to the number of events
default: uses variance that maximized training log-likelihood
Variance of the event Gaussians. If scalar, all events are
assumed to have the same variance.
assumed to have the same variance. If fit() has not previously
been run, this must be specifed (cannot be None).

scramble: bool : default False
If true, the order of the learned events are shuffled before
Expand All @@ -345,7 +364,11 @@ def find_events(self, testing_data, var=None, scramble=False):
"""

if var is None:
var = self.event_var_
if self.event_var_ is None:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The test should check the existence of the attribute, not whether it is None. The documentation is not perfectly clear, but the code is:

the existence of parameters with trailing _ is used to check if the estimator has been fitted.
http://scikit-learn.org/stable/developers/contributing.html#parameters-and-init

    if not all_or_any([hasattr(estimator, attr) for attr in attributes]):
        raise NotFittedError(msg % {'name': type(estimator).__name__})

https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/utils/validation.py#L793

raise NotFittedError(("The event patterns must first be set "
"by fit() or set_event_patterns()"))
else:
var = self.event_var_

if scramble:
mean_pat = self.event_pat_[:, np.random.permutation(self.n_events)]
Expand Down
1 change: 1 addition & 0 deletions docs/newsfragments/266.feature
@@ -0,0 +1 @@
eventseg: add set_event_patterns() and notebook example
831 changes: 831 additions & 0 deletions examples/eventseg/HiddenMarkovModels.ipynb

Large diffs are not rendered by default.

5 changes: 5 additions & 0 deletions examples/eventseg/requirements.txt
@@ -1 +1,6 @@
matplotlib
notebook
sklearn
deepdish
scipy

2 changes: 1 addition & 1 deletion examples/isfc/download_data.sh
@@ -1,4 +1,4 @@
#!/bin/sh
curl --location -o isfc.zip https://api.onedrive.com/v1.0/shares/s!Aobi2ryypFQCgqQOm2Zhs-TSLu9leQ/root/content
wget https://ndownloader.figshare.com/files/9342556 -O isfc.zip
unzip -qo isfc.zip
rm -f isfc.zip
11 changes: 11 additions & 0 deletions tests/eventseg/test_event.py
Expand Up @@ -38,3 +38,14 @@ def test_simple_boundary():
events = np.argmax(es.segments_[0], axis=1)
assert np.array_equal(events, [0, 0, 0, 1, 1, 1, 1]),\
"Failed to correctly segment two events"


def test_event_transfer():
es = brainiak.eventseg.event.EventSegment(2)
es.set_event_patterns(np.asarray([[1, 0], [0, 1]]))
sample_data = np.asarray([[1, 1, 1, 0, 0, 0, 0], [0, 0, 0, 1, 1, 1, 1]])
seg = es.find_events(sample_data.T, np.asarray([1, 1]))[0]

events = np.argmax(seg, axis=1)
assert np.array_equal(events, [0, 0, 0, 1, 1, 1, 1]),\
"Failed to correctly transfer two events to new data"