|
| 1 | +""" |
| 2 | +Generate spatio-temporal ground-truths |
| 3 | +====================================== |
| 4 | +
|
| 5 | +Frites provides some functions to generate spatio-temporal ground-truths (i.e. |
| 6 | +effects that are distributed across time and space with a predefined profile). |
| 7 | +Those ground-truths can be particularly interesting to simulate the data coming |
| 8 | +from multiple subjects, compare group-level strategies such as methods for |
| 9 | +multiple comparisons. |
| 10 | +
|
| 11 | +This example illustrates : |
| 12 | +
|
| 13 | + * How the predefined ground-truth effects looks like |
| 14 | + * How to generate the data coming from multiple subjects |
| 15 | + * How to use the workflow of mutual-information to retrieve the effect |
| 16 | + * How to use statistical measures to compare the effect detected as |
| 17 | + significant and the ground-truth |
| 18 | +""" |
| 19 | +import numpy as np |
| 20 | +import xarray as xr |
| 21 | + |
| 22 | +from frites.simulations import sim_ground_truth |
| 23 | +from frites.dataset import DatasetEphy |
| 24 | +from frites.workflow import WfMi |
| 25 | + |
| 26 | +import matplotlib.pyplot as plt |
| 27 | +import matplotlib as mpl |
| 28 | + |
| 29 | +plt.style.use('ggplot') |
| 30 | + |
| 31 | + |
| 32 | +############################################################################### |
| 33 | +# Comparison of the implemented ground truth |
| 34 | +# ------------------------------------------ |
| 35 | +# |
| 36 | +# In this first part we illustrate the implemented ground-truths profiles. This |
| 37 | +# includes effects with varying covariance over time and space, weak and |
| 38 | +# diffuse effects and strong and focal effect. |
| 39 | + |
| 40 | +gtypes = ['tri', 'tri_r', 'diffuse', 'focal'] |
| 41 | +n_subjects = 1 |
| 42 | +n_epochs = 5 |
| 43 | + |
| 44 | +gts = {} |
| 45 | +for gtype in gtypes: |
| 46 | + gts[gtype] = sim_ground_truth(n_subjects, n_epochs, gtype=gtype, |
| 47 | + gt_only=True, gt_as_cov=True) |
| 48 | + |
| 49 | +# plot the four ground-truths |
| 50 | +fig, axs = plt.subplots( |
| 51 | + ncols=len(gtypes), sharex=False, sharey=False, figsize=(18, 4.5), |
| 52 | + gridspec_kw=dict(wspace=.2, left=0.05, right=0.95) |
| 53 | +) |
| 54 | +axs = np.ravel(axs) |
| 55 | + |
| 56 | +for n_g, g in enumerate(gtypes): |
| 57 | + plt.sca(axs[n_g]) |
| 58 | + df = gts[g].to_pandas().T |
| 59 | + plt.pcolormesh(df.columns, df.index, df.values, cmap='Spectral_r', |
| 60 | + shading='nearest', vmin=0., vmax=0.3) |
| 61 | + plt.title(g.capitalize(), fontweight='bold', fontsize=15) |
| 62 | + plt.grid(True) |
| 63 | + plt.xlabel('Time (bin)') |
| 64 | + if n_g == 0: plt.ylabel('Spatial (bin)') |
| 65 | + plt.colorbar() |
| 66 | + |
| 67 | +plt.show() |
| 68 | + |
| 69 | +############################################################################### |
| 70 | +# .. note:: |
| 71 | +# Here, the ground-truth contains values of covariance at specific time |
| 72 | +# and spatial bins. The covariance reflects where there's a relation |
| 73 | +# between the brain data and the external continuous y variable and how |
| 74 | +# strong is this relation. |
| 75 | + |
| 76 | + |
| 77 | +############################################################################### |
| 78 | +# Data simulation |
| 79 | +# --------------- |
| 80 | +# |
| 81 | +# In this second part, we use the same function to simulate the data coming |
| 82 | +# from multiple subjects. The returned data are a list of length n_subjects |
| 83 | +# where each element of the list is an array (xarray.DataArray) of shape |
| 84 | +# (n_epochs, n_roi, n_times). In addition to the simulated data, the external |
| 85 | +# continuous variable is set as a coordinate of the trial dimension ('y') |
| 86 | + |
| 87 | +gtype = 'tri' # ground truth type |
| 88 | +n_subjects = 10 # number of simulated subjects |
| 89 | +n_epochs = 100 # number of trials per subject |
| 90 | + |
| 91 | +# generate the data for all of the subjects |
| 92 | +da, gt = sim_ground_truth(n_subjects, n_epochs, gtype=gtype, random_state=42) |
| 93 | + |
| 94 | +# get data (min, max) for plotting |
| 95 | +vmin, vmax = [], [] |
| 96 | +for d in da: |
| 97 | + d = d.mean('y') |
| 98 | + vmin.append(np.percentile(d.data, 5)) |
| 99 | + vmax.append(np.percentile(d.data, 95)) |
| 100 | +vmin, vmax = np.min(vmin), np.max(vmax) |
| 101 | + |
| 102 | +# plot the four ground-truths |
| 103 | +nrows = 2 |
| 104 | +ncols = int(np.round(n_subjects / nrows)) |
| 105 | +width = int(np.round(4 * ncols)) |
| 106 | +height = int(np.round(4 * nrows)) |
| 107 | + |
| 108 | +fig, axs = plt.subplots( |
| 109 | + ncols=ncols, nrows=nrows, sharex=True, sharey=True, |
| 110 | + figsize=(width, height), gridspec_kw=dict(wspace=.1, left=0.05, right=0.95) |
| 111 | +) |
| 112 | +axs = np.ravel(axs) |
| 113 | + |
| 114 | +for n_s in range(n_subjects): |
| 115 | + # subject selection and mean across trials |
| 116 | + df = da[n_s].mean('y').to_pandas() |
| 117 | + |
| 118 | + plt.sca(axs[n_s]) |
| 119 | + plt.pcolormesh(df.columns, df.index, df.values, cmap='Spectral_r', |
| 120 | + shading='nearest', vmin=vmin, vmax=vmax) |
| 121 | + plt.title(f"Subject #{n_s}", fontweight='bold', fontsize=15) |
| 122 | + plt.grid(True) |
| 123 | + plt.xlabel('Time (bin)') |
| 124 | + plt.ylabel('Spatial (bin)') |
| 125 | + plt.colorbar() |
| 126 | + |
| 127 | +plt.show() |
| 128 | + |
| 129 | + |
| 130 | +############################################################################### |
| 131 | +# Compute the effect size and group-level statistics |
| 132 | +# -------------------------------------------------- |
| 133 | +# |
| 134 | +# In this third part, we compute the mutual-information and the statistics at |
| 135 | +# the group-level using the data simulated above. To this end, we first define |
| 136 | +# a dataset containing the electrophysiological. In the simulated data, the |
| 137 | +# spatial dimension is called 'roi', the temporal dimension is called 'times' |
| 138 | +# and the external continuous y variable is attached along the trial dimension |
| 139 | +# and is called 'y'. After that, we use a random-effect model for the |
| 140 | +# population with p-values corrected for multiple comparisons using a cluster- |
| 141 | +# based approach. |
| 142 | + |
| 143 | +# define a dataset hosting the data coming from multiple subjects |
| 144 | +dt = DatasetEphy(da, y='y', roi='roi', times='times') |
| 145 | + |
| 146 | +# run the statistics at the group-level |
| 147 | +wf = WfMi(mi_type='cc', inference='rfx') |
| 148 | +mi, pv = wf.fit(dt, n_perm=200, mcp='cluster') |
| 149 | + |
| 150 | +# get the t-values |
| 151 | +tv = wf.tvalues |
| 152 | + |
| 153 | +# xarray to dataframe conversion |
| 154 | +df_tv = tv.to_pandas().T |
| 155 | +df_pv = (pv < 0.05).to_pandas().T |
| 156 | +df_gt = gt.astype(int).to_pandas().T |
| 157 | + |
| 158 | +# sphinx_gallery_thumbnail_number = 3 |
| 159 | +# plot the results |
| 160 | +fig, axs = plt.subplots( |
| 161 | + ncols=3, sharex=True, sharey=True, |
| 162 | + figsize=(16, 4.5), gridspec_kw=dict(wspace=.1, left=0.05, right=0.95) |
| 163 | +) |
| 164 | +axs = np.ravel(axs) |
| 165 | + |
| 166 | +kw_title = dict(fontweight='bold', fontsize=15) |
| 167 | +kw_heatmap = dict(shading='nearest') |
| 168 | + |
| 169 | +plt.sca(axs[0]) |
| 170 | +plt.pcolormesh(df_tv.columns, df_tv.index, df_tv.values, cmap='viridis', |
| 171 | + vmin=0., vmax=np.percentile(df_tv.values, 99), **kw_heatmap) |
| 172 | +plt.colorbar() |
| 173 | +plt.title(f"Effect-size at the group-level\n(t-values)", **kw_title) |
| 174 | + |
| 175 | +plt.sca(axs[1]) |
| 176 | +plt.pcolormesh(df_pv.columns, df_pv.index, df_pv.values, cmap='RdBu_r', |
| 177 | + vmin=0, vmax=1, **kw_heatmap) |
| 178 | +plt.colorbar() |
| 179 | +plt.title(f"Effects detected as significant\nat the group-level (p<0.05)", |
| 180 | + **kw_title) |
| 181 | + |
| 182 | +plt.sca(axs[2]) |
| 183 | +plt.pcolormesh(df_gt.columns, df_gt.index, df_gt.values, cmap='RdBu_r', |
| 184 | + vmin=0, vmax=1, **kw_heatmap) |
| 185 | +plt.colorbar() |
| 186 | +plt.title(f"Ground-truth\n(gtype={gtype})", **kw_title) |
| 187 | + |
| 188 | +plt.show() |
| 189 | + |
| 190 | +############################################################################### |
| 191 | +# Comparison between the effect detected as significant and the ground-truth |
| 192 | +# -------------------------------------------------------------------------- |
| 193 | +# |
| 194 | +# This last section quantifies how well the statistical framework performed |
| 195 | +# on this particular ground-truth. The overall idea is to use statistical |
| 196 | +# measures (namely false / true positive / negative rates) to quantify how well |
| 197 | +# the framework of group-level statistics is able to retrieve the ground-truth. |
| 198 | + |
| 199 | +# bins with / without effect in the ground-truth |
| 200 | +tp_gt, tn_gt = (df_gt.values == 1), (df_gt.values == 0) |
| 201 | +dim_gt = np.prod(df_gt.values.shape) |
| 202 | + |
| 203 | +print( |
| 204 | + "\n" |
| 205 | + "Ground-Truth\n" |
| 206 | + "------------\n" |
| 207 | + f"- Total number of spatio-temporal bins : {dim_gt}\n" |
| 208 | + f"- Number of bins with an effect : {tp_gt.sum()}\n" |
| 209 | + f"- Number of bins without effect : {tn_gt.sum()}\n" |
| 210 | +) |
| 211 | + |
| 212 | +# bins with / without in the retrieved effect |
| 213 | +tp_pv, tn_pv = (df_pv.values == 1), (df_pv.values == 0) |
| 214 | +dim_pv = np.prod(df_pv.values.shape) |
| 215 | + |
| 216 | +print( |
| 217 | + "Bins detected as significant\n" |
| 218 | + "----------------------------\n" |
| 219 | + f"- Total number of spatio-temporal bins : {dim_pv}\n" |
| 220 | + f"- Number of bins with an effect : {tp_pv.sum()}\n" |
| 221 | + f"- Number of bins without effect : {tn_pv.sum()}\n" |
| 222 | +) |
| 223 | + |
| 224 | +# comparison between the ground-truth |
| 225 | +tp = np.logical_and(tp_pv, tp_gt).sum() |
| 226 | +tn = np.logical_and(tn_pv, tn_gt).sum() |
| 227 | +fp = np.logical_and(tp_pv, tn_gt).sum() |
| 228 | +fn = np.logical_and(tn_pv, tp_gt).sum() |
| 229 | + |
| 230 | +print( |
| 231 | + "Comparison between the ground-truth and the retrieved effect\n" |
| 232 | + "------------------------------------------------------------\n" |
| 233 | + f"- Number of true positive : {tp}\n" |
| 234 | + f"- Number of true negative : {tn}\n" |
| 235 | + f"- Number of false positive : {fp}\n" |
| 236 | + f"- Number of false negative : {fn}\n" |
| 237 | +) |
| 238 | + |
| 239 | +# Type I error rate (false positive) |
| 240 | +p_fp = fp / (fp + tn) # == fp / n_false |
| 241 | +# Type II error rate (false negative) |
| 242 | +p_fn = fn / (fn + tp) # == fn / n_true |
| 243 | +# Sensitivity (true positive rate) |
| 244 | +sen = tp / (tp + fn) # == 1. - p_fn == tp / n_true |
| 245 | +# Specificity (true negative rate) |
| 246 | +spe = tn / (tn + fp) # == 1. - p_fp == tn / n_false |
| 247 | +# Matthews Correlation Coefficient |
| 248 | +numer = np.array(tp * tn - fp * fn) |
| 249 | +denom = np.sqrt((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn)) |
| 250 | +mcc = numer / denom |
| 251 | + |
| 252 | +print( |
| 253 | + f"Statistics\n" |
| 254 | + f"----------\n" |
| 255 | + f"- Type I error (false positive rate): {p_fp}\n" |
| 256 | + f"- Type II error (false negative rate): {p_fn}\n" |
| 257 | + f"- Sensitivity (true positive rate): {sen}\n" |
| 258 | + f"- Specificity (true negative rate): {spe}\n" |
| 259 | + f"- Matthews Correlation Coefficient: {mcc}" |
| 260 | +) |
0 commit comments