Skip to content

Commit ba44a42

Browse files
committed
Integration of the function for simulated spatio-temporal ground-truths
1 parent 51bc90c commit ba44a42

File tree

5 files changed

+462
-6
lines changed

5 files changed

+462
-6
lines changed

docs/source/api/api_simulations.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ Single and multi-subjects gaussian-based simulations
2424
.. autosummary::
2525
:toctree: generated/
2626

27+
sim_ground_truth
2728
sim_local_cc_ss
2829
sim_local_cc_ms
2930
sim_local_cd_ss
Lines changed: 260 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,260 @@
1+
"""
2+
Generate spatio-temporal ground-truths
3+
======================================
4+
5+
Frites provides some functions to generate spatio-temporal ground-truths (i.e.
6+
effects that are distributed across time and space with a predefined profile).
7+
Those ground-truths can be particularly interesting to simulate the data coming
8+
from multiple subjects, compare group-level strategies such as methods for
9+
multiple comparisons.
10+
11+
This example illustrates :
12+
13+
* How the predefined ground-truth effects looks like
14+
* How to generate the data coming from multiple subjects
15+
* How to use the workflow of mutual-information to retrieve the effect
16+
* How to use statistical measures to compare the effect detected as
17+
significant and the ground-truth
18+
"""
19+
import numpy as np
20+
import xarray as xr
21+
22+
from frites.simulations import sim_ground_truth
23+
from frites.dataset import DatasetEphy
24+
from frites.workflow import WfMi
25+
26+
import matplotlib.pyplot as plt
27+
import matplotlib as mpl
28+
29+
plt.style.use('ggplot')
30+
31+
32+
###############################################################################
33+
# Comparison of the implemented ground truth
34+
# ------------------------------------------
35+
#
36+
# In this first part we illustrate the implemented ground-truths profiles. This
37+
# includes effects with varying covariance over time and space, weak and
38+
# diffuse effects and strong and focal effect.
39+
40+
gtypes = ['tri', 'tri_r', 'diffuse', 'focal']
41+
n_subjects = 1
42+
n_epochs = 5
43+
44+
gts = {}
45+
for gtype in gtypes:
46+
gts[gtype] = sim_ground_truth(n_subjects, n_epochs, gtype=gtype,
47+
gt_only=True, gt_as_cov=True)
48+
49+
# plot the four ground-truths
50+
fig, axs = plt.subplots(
51+
ncols=len(gtypes), sharex=False, sharey=False, figsize=(18, 4.5),
52+
gridspec_kw=dict(wspace=.2, left=0.05, right=0.95)
53+
)
54+
axs = np.ravel(axs)
55+
56+
for n_g, g in enumerate(gtypes):
57+
plt.sca(axs[n_g])
58+
df = gts[g].to_pandas().T
59+
plt.pcolormesh(df.columns, df.index, df.values, cmap='Spectral_r',
60+
shading='nearest', vmin=0., vmax=0.3)
61+
plt.title(g.capitalize(), fontweight='bold', fontsize=15)
62+
plt.grid(True)
63+
plt.xlabel('Time (bin)')
64+
if n_g == 0: plt.ylabel('Spatial (bin)')
65+
plt.colorbar()
66+
67+
plt.show()
68+
69+
###############################################################################
70+
# .. note::
71+
# Here, the ground-truth contains values of covariance at specific time
72+
# and spatial bins. The covariance reflects where there's a relation
73+
# between the brain data and the external continuous y variable and how
74+
# strong is this relation.
75+
76+
77+
###############################################################################
78+
# Data simulation
79+
# ---------------
80+
#
81+
# In this second part, we use the same function to simulate the data coming
82+
# from multiple subjects. The returned data are a list of length n_subjects
83+
# where each element of the list is an array (xarray.DataArray) of shape
84+
# (n_epochs, n_roi, n_times). In addition to the simulated data, the external
85+
# continuous variable is set as a coordinate of the trial dimension ('y')
86+
87+
gtype = 'tri' # ground truth type
88+
n_subjects = 10 # number of simulated subjects
89+
n_epochs = 100 # number of trials per subject
90+
91+
# generate the data for all of the subjects
92+
da, gt = sim_ground_truth(n_subjects, n_epochs, gtype=gtype, random_state=42)
93+
94+
# get data (min, max) for plotting
95+
vmin, vmax = [], []
96+
for d in da:
97+
d = d.mean('y')
98+
vmin.append(np.percentile(d.data, 5))
99+
vmax.append(np.percentile(d.data, 95))
100+
vmin, vmax = np.min(vmin), np.max(vmax)
101+
102+
# plot the four ground-truths
103+
nrows = 2
104+
ncols = int(np.round(n_subjects / nrows))
105+
width = int(np.round(4 * ncols))
106+
height = int(np.round(4 * nrows))
107+
108+
fig, axs = plt.subplots(
109+
ncols=ncols, nrows=nrows, sharex=True, sharey=True,
110+
figsize=(width, height), gridspec_kw=dict(wspace=.1, left=0.05, right=0.95)
111+
)
112+
axs = np.ravel(axs)
113+
114+
for n_s in range(n_subjects):
115+
# subject selection and mean across trials
116+
df = da[n_s].mean('y').to_pandas()
117+
118+
plt.sca(axs[n_s])
119+
plt.pcolormesh(df.columns, df.index, df.values, cmap='Spectral_r',
120+
shading='nearest', vmin=vmin, vmax=vmax)
121+
plt.title(f"Subject #{n_s}", fontweight='bold', fontsize=15)
122+
plt.grid(True)
123+
plt.xlabel('Time (bin)')
124+
plt.ylabel('Spatial (bin)')
125+
plt.colorbar()
126+
127+
plt.show()
128+
129+
130+
###############################################################################
131+
# Compute the effect size and group-level statistics
132+
# --------------------------------------------------
133+
#
134+
# In this third part, we compute the mutual-information and the statistics at
135+
# the group-level using the data simulated above. To this end, we first define
136+
# a dataset containing the electrophysiological. In the simulated data, the
137+
# spatial dimension is called 'roi', the temporal dimension is called 'times'
138+
# and the external continuous y variable is attached along the trial dimension
139+
# and is called 'y'. After that, we use a random-effect model for the
140+
# population with p-values corrected for multiple comparisons using a cluster-
141+
# based approach.
142+
143+
# define a dataset hosting the data coming from multiple subjects
144+
dt = DatasetEphy(da, y='y', roi='roi', times='times')
145+
146+
# run the statistics at the group-level
147+
wf = WfMi(mi_type='cc', inference='rfx')
148+
mi, pv = wf.fit(dt, n_perm=200, mcp='cluster')
149+
150+
# get the t-values
151+
tv = wf.tvalues
152+
153+
# xarray to dataframe conversion
154+
df_tv = tv.to_pandas().T
155+
df_pv = (pv < 0.05).to_pandas().T
156+
df_gt = gt.astype(int).to_pandas().T
157+
158+
# sphinx_gallery_thumbnail_number = 3
159+
# plot the results
160+
fig, axs = plt.subplots(
161+
ncols=3, sharex=True, sharey=True,
162+
figsize=(16, 4.5), gridspec_kw=dict(wspace=.1, left=0.05, right=0.95)
163+
)
164+
axs = np.ravel(axs)
165+
166+
kw_title = dict(fontweight='bold', fontsize=15)
167+
kw_heatmap = dict(shading='nearest')
168+
169+
plt.sca(axs[0])
170+
plt.pcolormesh(df_tv.columns, df_tv.index, df_tv.values, cmap='viridis',
171+
vmin=0., vmax=np.percentile(df_tv.values, 99), **kw_heatmap)
172+
plt.colorbar()
173+
plt.title(f"Effect-size at the group-level\n(t-values)", **kw_title)
174+
175+
plt.sca(axs[1])
176+
plt.pcolormesh(df_pv.columns, df_pv.index, df_pv.values, cmap='RdBu_r',
177+
vmin=0, vmax=1, **kw_heatmap)
178+
plt.colorbar()
179+
plt.title(f"Effects detected as significant\nat the group-level (p<0.05)",
180+
**kw_title)
181+
182+
plt.sca(axs[2])
183+
plt.pcolormesh(df_gt.columns, df_gt.index, df_gt.values, cmap='RdBu_r',
184+
vmin=0, vmax=1, **kw_heatmap)
185+
plt.colorbar()
186+
plt.title(f"Ground-truth\n(gtype={gtype})", **kw_title)
187+
188+
plt.show()
189+
190+
###############################################################################
191+
# Comparison between the effect detected as significant and the ground-truth
192+
# --------------------------------------------------------------------------
193+
#
194+
# This last section quantifies how well the statistical framework performed
195+
# on this particular ground-truth. The overall idea is to use statistical
196+
# measures (namely false / true positive / negative rates) to quantify how well
197+
# the framework of group-level statistics is able to retrieve the ground-truth.
198+
199+
# bins with / without effect in the ground-truth
200+
tp_gt, tn_gt = (df_gt.values == 1), (df_gt.values == 0)
201+
dim_gt = np.prod(df_gt.values.shape)
202+
203+
print(
204+
"\n"
205+
"Ground-Truth\n"
206+
"------------\n"
207+
f"- Total number of spatio-temporal bins : {dim_gt}\n"
208+
f"- Number of bins with an effect : {tp_gt.sum()}\n"
209+
f"- Number of bins without effect : {tn_gt.sum()}\n"
210+
)
211+
212+
# bins with / without in the retrieved effect
213+
tp_pv, tn_pv = (df_pv.values == 1), (df_pv.values == 0)
214+
dim_pv = np.prod(df_pv.values.shape)
215+
216+
print(
217+
"Bins detected as significant\n"
218+
"----------------------------\n"
219+
f"- Total number of spatio-temporal bins : {dim_pv}\n"
220+
f"- Number of bins with an effect : {tp_pv.sum()}\n"
221+
f"- Number of bins without effect : {tn_pv.sum()}\n"
222+
)
223+
224+
# comparison between the ground-truth
225+
tp = np.logical_and(tp_pv, tp_gt).sum()
226+
tn = np.logical_and(tn_pv, tn_gt).sum()
227+
fp = np.logical_and(tp_pv, tn_gt).sum()
228+
fn = np.logical_and(tn_pv, tp_gt).sum()
229+
230+
print(
231+
"Comparison between the ground-truth and the retrieved effect\n"
232+
"------------------------------------------------------------\n"
233+
f"- Number of true positive : {tp}\n"
234+
f"- Number of true negative : {tn}\n"
235+
f"- Number of false positive : {fp}\n"
236+
f"- Number of false negative : {fn}\n"
237+
)
238+
239+
# Type I error rate (false positive)
240+
p_fp = fp / (fp + tn) # == fp / n_false
241+
# Type II error rate (false negative)
242+
p_fn = fn / (fn + tp) # == fn / n_true
243+
# Sensitivity (true positive rate)
244+
sen = tp / (tp + fn) # == 1. - p_fn == tp / n_true
245+
# Specificity (true negative rate)
246+
spe = tn / (tn + fp) # == 1. - p_fp == tn / n_false
247+
# Matthews Correlation Coefficient
248+
numer = np.array(tp * tn - fp * fn)
249+
denom = np.sqrt((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn))
250+
mcc = numer / denom
251+
252+
print(
253+
f"Statistics\n"
254+
f"----------\n"
255+
f"- Type I error (false positive rate): {p_fp}\n"
256+
f"- Type II error (false negative rate): {p_fn}\n"
257+
f"- Sensitivity (true positive rate): {sen}\n"
258+
f"- Specificity (true negative rate): {spe}\n"
259+
f"- Matthews Correlation Coefficient: {mcc}"
260+
)

frites/simulations/__init__.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,10 @@
1515
# task-related brain network
1616
from .sim_ar import StimSpecAR # noqa
1717
# gaussian-based task-related functions
18-
from .sim_local_mi import (sim_local_cc_ss, sim_local_cc_ms, # noqa
19-
sim_local_cd_ss, sim_local_cd_ms,
20-
sim_local_ccd_ms, sim_local_ccd_ss)
18+
from .sim_local_mi import (
19+
sim_local_cc_ss, sim_local_cc_ms, sim_local_cd_ss, sim_local_cd_ms, # noqa
20+
sim_local_ccd_ms, sim_local_ccd_ss, sim_ground_truth
21+
)
2122

2223
# Undocumented functions
2324
# ----------------------

0 commit comments

Comments
 (0)