# GLM Fingertapping Example

In [None]:
import matplotlib.pyplot as p
import numpy as np
import pandas as pd
import xarray as xr

import cedalion
import cedalion.datasets
import cedalion.io
import cedalion.models.glm as glm
import cedalion.nirs
import cedalion.plots as plots
import cedalion.sigproc.frequency
from cedalion import units

xr.set_options(display_expand_data=False);

## Loading and preprocessing the dataset

This notebook uses a finger-tapping dataset in BIDS layout provided by [Rob Luke](https://github.com/rob-luke/BIDS-NIRS-Tapping). It can can be downloaded via `cedalion.datasets`.

In [None]:
rec = cedalion.datasets.get_fingertapping()

# rename trials
rec.stim.cd.rename_events(
    {
        "1.0": "control",
        "2.0": "Tapping/Left",
        "3.0": "Tapping/Right",
        "15.0": "sentinel",
    }
)
rec.stim = rec.stim[rec.stim.trial_type != "sentinel"]

# differential pathlenght factors
dpf = xr.DataArray(
    [6, 6],
    dims="wavelength",
    coords={"wavelength": rec["amp"].wavelength},
)

# calculate optical density and concentrations
rec["od"] = cedalion.nirs.int2od(rec["amp"])
rec["conc"] = cedalion.nirs.od2conc(rec["od"], rec.geo3d, dpf, spectrum="prahl")

# Bandpass filter remove cardiac component and slow drifts.
# Here we use a highpass to remove drift. Another possible option would be to
# use drift regressors in the design matrix.
fmin = 0.02 * units.Hz
#fmax = 0.3 * units.Hz
fmax = 0 * units.Hz

rec["conc_filtered"] = cedalion.sigproc.frequency.freq_filter(rec["conc"], fmin, fmax)

display(rec)

Plot freq. filtered concentration data for two channels on the left (S1D1, S1D3) and right (S5D5, S5D7) hemispheres.

In [None]:
ts = rec["conc_filtered"]

f, ax = p.subplots(4, 1, sharex=True, figsize=(12, 6))
for i, ch in enumerate(["S1D1", "S1D3", "S5D5", "S5D7"]):
    ax[i].plot(ts.time, ts.sel(channel=ch, chromo="HbO"), "r-", label="HbO")
    ax[i].plot(ts.time, ts.sel(channel=ch, chromo="HbR"), "b-", label="HbR")
    ax[i].set_title(f"Ch. {ch}")
    cedalion.plots.plot_stim_markers(ax[i], rec.stim, y=1)
    ax[i].set_ylabel(r"$\Delta$ c / uM")

ax[0].legend(ncol=6)
ax[3].set_label("time / s")
ax[3].set_xlim(0,300)
p.tight_layout()

## Build design matrix
- use the `glm.make_design_matrix` method to build regressors
- to account for signal components from superficial layers use short-distance channel regression: for each long channel the closest short channel is selected. From these the channel-wise regressor'short' is derived.

In [None]:
# split time series into two based on channel distance
ts_long, ts_short = cedalion.nirs.split_long_short_channels(
    rec["conc_filtered"], rec.geo3d, distance_threshold=1.5 * units.cm
)

dms = (
    glm.design_matrix.hrf_regressors(
        ts_long, rec.stim, glm.Gamma(tau=0 * units.s, sigma=3 * units.s, T=3 * units.s)
    )
    & glm.design_matrix.drift_regressors(ts, drift_order=1)
    & glm.design_matrix.closest_short_channel_regressor(ts_long, ts_short, rec.geo3d)
)


The design matrix `dm` holds all regressors that apply to all channels. It has dimensions 'time', 'chromo' and 'regressor'. Regressors have string labels.

In [None]:
display(dms)
display(dms.common)


`channel_wise_regressors` is list of additional xr.DataArrays that contain regressors which differ between channels.
Each such array may contain only one regressor (i.e. the size of the regressor dimension must be 1). The regressors for 
each channel are arranged in the additional 'channel' dimension.

In [None]:

display(dms.channel_wise[0])



In [None]:
dms.channel_wise[0] = dms.channel_wise[0].pint.dequantify()
dms.channel_wise[0] /= dms.channel_wise[0].max("time")

## Visualize the design matrix

In [None]:
dm = dms.common
display(dm)

# using xr.DataArray.plot
f, ax = p.subplots(1,1,figsize=(12,5))
dm.sel(chromo="HbO", time=dm.time < 600).T.plot()
p.xticks(rotation=90)
p.show()

# line plots of all regressors
f, ax = p.subplots(2,1,sharex=True, figsize=(12,5))

ch = "S5D5"

for i, chromo in enumerate(["HbO", "HbR"]):
    for reg in dm.regressor.values:
        ax[i].plot(dm.time, dm.sel(chromo=chromo, regressor=reg), label=reg)

    for cwr in dms.channel_wise:
        for reg in cwr.regressor.values:
            ax[i].plot(cwr.time, cwr.sel(chromo=chromo, regressor=reg, channel=ch), label=reg)
    plots.plot_stim_markers(ax[i], rec.stim, y=1)
    ax[i].grid()
    ax[i].set_title(chromo)
    ax[i].set_ylim(-1.5,1.5)

ax[0].legend(ncol=5)
ax[0].set_xlim(0,240);

## Fitting the model

In [None]:
results = glm.fit(ts_long, dms, noise_model="ar_irls", max_jobs=1)

display(results)

#pd.set_option('display.max_rows', None)
#display(betas.rename("beta").to_dataframe())

In [None]:
betas = results.sm.params
betas

In [None]:
betas.rename("betas").to_dataframe()

In [None]:
# best fit parameters + confidence intervals
pd.concat([results[0,0].item().conf_int(), results[0,0].item().params.rename("beta")], axis=1)

## Model Predictions

- using `glm.predict` one can scale the regressors in `dm` and `channel_wise_regressors` with the estimated coefficients to obtain a model prediction
- by giving only a subset of betas to `glm.predict` one can predict subcomponents of the model

In [None]:
# prediction using all regressors
betas = results.sm.params
pred = glm.predict(ts_long, betas, dms)#, channel_wise_regressors)

# prediction of all nuisance regressors, i.e. all regressors that don't start with 'HRF '
pred_wo_hrf = glm.predict(
    ts_long,
    betas.sel(regressor=~betas.regressor.str.startswith("HRF ")),
    dms,
)

# prediction of all HRF regressors, i.e. all regressors that start with 'HRF '
pred_hrf = glm.predict(
    ts_long,
    betas.sel(regressor=betas.regressor.str.startswith("HRF ")),
    dms,
)

## Plot model predictions

In [None]:
# plot the data and model prediction
#ch = "S6D7"
ch = "S5D5"
f, ax = p.subplots(1,1, figsize=(12, 4))
p.plot(ts_long.time, ts_long.sel(chromo="HbO", channel=ch), "r-", label="data HbO", alpha=.5)
p.plot(pred.time, pred.sel(chromo="HbO", channel=ch), "r-", label="model", lw=2 )
p.plot(pred.time, pred_wo_hrf.sel(chromo="HbO", channel=ch), "k:", label="model w/o HRF", alpha=.5)
plots.plot_stim_markers(ax, rec.stim, y=1)
p.xlim(60,300)
p.ylim(-.4,.4)
p.xlabel("time / s")
p.ylabel(r"$\Delta$  c / uM")
p.legend(ncol=4)


# subtract from data nuisance regressors and plot against predicted HRF components
f, ax = p.subplots(1,1, figsize=(12, 4))
p.plot(pred_hrf.time, pred_hrf.sel(chromo="HbO", channel=ch), "r-", label="HRF HbO")
p.plot(pred_hrf.time, pred_hrf.sel(chromo="HbR", channel=ch), "b-", label="HRF HbR")
p.plot(
    pred_hrf.time, 
    ts_long.sel(chromo="HbO", channel=ch).pint.dequantify() - pred_wo_hrf.sel(chromo="HbO", channel=ch), 
    "r-", label="data HbO - nuisance reg.", alpha=.5
)
p.plot(
    pred_hrf.time, 
    ts_long.sel(chromo="HbR", channel=ch).pint.dequantify() - pred_wo_hrf.sel(chromo="HbR", channel=ch), 
    "b-", label="data HbR - nuisance reg.", alpha=.5
)
plots.plot_stim_markers(ax, rec.stim, y=1)
p.legend(ncol=4, loc="lower right")

p.xlim(60,500)
p.xlabel("time / s")
p.ylabel(r"$\Delta$  c / uM");


### Scalp plots

#### Betas

In [None]:
f, ax = p.subplots(2, 3, figsize=(12, 8))
vlims = {"HbO" : [0.,0.3], "HbR" : [-0.1, 0.05]}
for i_chr, chromo in enumerate(betas.chromo.values):
    vmin, vmax = vlims[chromo]
    for i_reg, reg in enumerate(["HRF Tapping/Left", "HRF Tapping/Right", "HRF control"]):
        cedalion.plots.scalp_plot(
            rec["amp"],
            rec.geo3d,
            betas.sel(chromo=chromo, regressor=reg),
            ax[i_chr, i_reg],
            min_dist=1.5 * cedalion.units.cm,
            title=f"{chromo} {reg}",
            vmin=vmin,
            vmax=vmax,
            optode_labels=True,
            cmap="RdBu_r",
            cb_label=r"$\beta$"
        )
p.tight_layout()

#### T-Values

In [None]:
display(results.sm.tvalues)
results.sm.tvalues.min().item(), results.sm.tvalues.max().item()

In [None]:
f, ax = p.subplots(2, 3, figsize=(12, 8))
vlims = {"HbO" : [-20,20], "HbR" : [-20, 20]}
for i_chr, chromo in enumerate(betas.chromo.values):
    vmin, vmax = vlims[chromo]
    for i_reg, reg in enumerate(["HRF Tapping/Left", "HRF Tapping/Right", "HRF control"]):
        cedalion.plots.scalp_plot(
            rec["amp"],
            rec.geo3d,
            results.sm.tvalues.sel(chromo=chromo, regressor=reg),
            ax[i_chr, i_reg],
            min_dist=1.5 * cedalion.units.cm,
            title=f"{chromo} {reg}",
            vmin=vmin,
            vmax=vmax,
            optode_labels=True,
            cmap="RdBu_r",
            cb_label=r"$t$"
        )
p.tight_layout()

## SM Functionality to Document

In [None]:
results.sm.params

In [None]:
results.sm.conf_int(alpha=0.05)

In [None]:
results.sm.cov_params()

In [None]:
p.imshow(results.sm.cov_params()[0,0,:,:]);

In [None]:
# convenience function to access the diagonal elements of the cov matrices
results.sm.regressor_variances()

### Statistical Tests
[t-test statmodels docs](https://www.statsmodels.org/dev/generated/statsmodels.regression.linear_model.RegressionResults.t_test.html#statsmodels.regression.linear_model.RegressionResults.t_test)


In [None]:
results.sm.tvalues

specifying contrasts through strings
results in an array of [ContrastResult objects](https://www.statsmodels.org/dev/dev/generated/statsmodels.stats.contrast.ContrastResults.html#statsmodels.stats.contrast.ContrastResults)

In [None]:
hypotheses = "HRF Tapping/Left = HRF control, HRF Tapping/Right = HRF control"
results.sm.t_test(hypotheses)

extract tvalues and pvalues with map

**FIXME**: add convenience functions

In [None]:
display(results.sm.t_test(hypotheses).sm.map(lambda i : i.tvalue, name="hypothesis"))

In [None]:
display(results.sm.t_test(hypotheses).sm.map(lambda i : i.pvalue, name="hypothesis"))

Plotting Uncertainty Bands

In [None]:
betas = results.sm.params
cov = results.sm.cov_params()

In [None]:
sampled_betas = xr.zeros_like(betas).expand_dims({"sample" : 100}, axis=-1).copy()
for i_ch in range(sampled_betas.shape[0]):
    for i_cr in range(sampled_betas.shape[1]):
        sampled_betas[i_ch, i_cr, :, :] = np.random.multivariate_normal(
            betas[i_ch, i_cr, :],
            cov[i_ch, i_cr, :, :],
            size=100,
        ).T

In [None]:
sampled_betas

In [None]:
pred = glm.predict(ts_long, sampled_betas, dms)

In [None]:
pred

In [None]:
pred_mean = pred.mean("sample")
pred_std = pred.std("sample")

In [None]:
mm = pred_mean.loc[slice(60,80), "S5D5", "HbO"]
ss = pred_std.loc[slice(60,80), "S5D5", "HbO"]

p.plot(mm.time, mm, c="r")
p.fill_between(mm.time, mm-3*ss, mm+3*ss, fc="y", alpha=.8)

p.xlabel("time / s")
p.ylabel(r"$\Delta$  c / uM");