Skip to content

Commit

Permalink
EXA: data-partitioning
Browse files Browse the repository at this point in the history
  • Loading branch information
christianbrodbeck committed Jul 25, 2021
1 parent b19221b commit 9de1533
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 1 deletion.
1 change: 1 addition & 0 deletions doc/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ def section_order(self):
'deconvolution': [
'trf_intro.py',
'mtrf.py',
'partitions.py',
'epoch_impulse.py',
],
})
Expand Down
5 changes: 4 additions & 1 deletion eelbrain/plot/_split.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from typing import Union

from matplotlib.patches import Rectangle
from matplotlib.ticker import MaxNLocator
import numpy

from .._trf.shared import Splits, split_data
Expand Down Expand Up @@ -45,7 +46,8 @@ def __init__(
if colors is None:
colors = colors_for_oneway(attrs, unambiguous=[6, 3, 5])

layout = Layout(1, 16/9, 2, **kwargs)
h_default = max(2, 0.5 + 0.15 * len(splits.splits))
layout = Layout(1, 16/9, h_default, **kwargs)
EelFigure.__init__(self, None, layout)
ax = self.figure.axes[0]

Expand All @@ -68,6 +70,7 @@ def __init__(
labels = {key: labels[key] for key in handles}
ax.set_ylabel('Split')
ax.set_ylim(-0.5, len(splits.splits)-0.5)
ax.yaxis.set_major_locator(MaxNLocator(integer=True))
ax.set_xlabel('Sample')
ax.set_xlim(splits.segments[0, 0], splits.segments[-1, 1])
LegendMixin.__init__(self, legend, handles, labels)
Expand Down
53 changes: 53 additions & 0 deletions examples/deconvolution/partitions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# ---
# jupyter:
# jupytext:
# text_representation:
# extension: .py
# format_name: sphinx
# format_version: '1.1'
# jupytext_version: 1.11.3
# kernelspec:
# display_name: Python 3
# language: python
# name: python3
# ---

"""
.. _exa-data_split:
.. currentmodule:: eelbrain
Data partitions for boosting
============================
The boosting algorithm can use two different forms of cross-validation: cross-validation as stopping criterion (always on), and cross-validation for model evaluation (optional). This requires paertitioning the data into different segments. The :func:`eelbrain.plot.preview_partitions` function is for exploring the effect of different parameters on the way the data is split.
Validation
==========
During boosting, every training step consists in modifying one element of the kernel/TRF. After every such step, the new TRF is evaluated against the validation data. For continuous data (without :class:`Case` dimension), the default is to split the data into 10 equal-length segments, and perform 10 model fits, each using one of the segments as validation set. In the plots below, each "Split" shown on the y-axis corresponds to a separate run of the boosting algorithm. The results returned by the :func:`boosting` function would be based on to the average TRF of those 10 runs.
"""
# sphinx_gallery_thumbnail_number = 6
from eelbrain import *


p = plot.preview_partitions()

###############################################################################
# The number of partitions can be controlled with the `partitions` parameter:

p5 = plot.preview_partitions(partitions=5)
p2 = plot.preview_partitions(partitions=7)

###############################################################################
# For data with multiple trials (data with a :class:`Case` dimension), the function attempts to use trials evenly across time:

p = plot.preview_partitions(20, partitions=5)
p = plot.preview_partitions(20, partitions=2)

###############################################################################
# Testing
# -------
# Testing the result of a model fit with cross-validation requires data that was never used during training. Testing with cross-validation is enabled in the :func:`boosting` function by setting ``test=True``. When testing is enabled, each data segment is used in turn as testing segment. For each testing segment, the remaining segment are used in different runs as training and validation data. The results of those runs are then averaged to predict responses in the testing data. This nested loop means that the number of boosting runs can get large quickly when using many partitions, so the default is to use just four partitions:

p = plot.preview_partitions(test=True)

""
p = plot.preview_partitions(20, partitions=5, test=1)

0 comments on commit 9de1533

Please sign in to comment.