In [1]:
import json
from pathlib import Path

from bids.layout import BIDSLayout
from bids.modeling import BIDSStatsModelsGraph
from nilearn.plotting import plot_design_matrix

In [2]:
root = 'data/ds002741'
db_path = 'data/ds002741/dbcache'
reset_database = True
spec_path = 'model_specs/ds002741_spec.json'

In [3]:
layout = BIDSLayout(root=root, database_path=db_path, reset_database=reset_database)

In [4]:
spec = json.loads(Path(spec_path).read_text())

In [5]:
graph = BIDSStatsModelsGraph(layout, spec)
graph.load_collections(scan_length=292)

### Original `BIDSVariableCollection`

We can take a look at the original variables available for a single subject, prior to running the node (i.e. applying any transformations)

Note, my model spec doesn't query the derivatives at all, so I think that's why they don't show up as options for regressors?  Not sure


In [6]:
root_node = graph.root_node
colls = root_node.get_collections()
first_sub = colls[0]

# This is what there is to work with in the transformations

In [7]:
first_sub.to_df(entities=False)

  df = df.reset_index().replace("n/a", fillna)


Unnamed: 0,onset,duration,button_pushed,caric_cond,expression,identity,stim_name
0,7.285,1.960,,1,2,3,F036_disgust_anticars0001-0057_half.avi
1,9.746,1.897,,4,2,3,F036_disgust_cars0001-0057_half.avi
2,12.156,2.012,,0,2,3,
3,14.711,1.908,,3,2,3,F036_disgust_antimcars0001-0057_half.avi
4,17.112,2.014,28.0,5,2,3,TARGET
...,...,...,...,...,...,...,...
211,528.495,2.009,,0,1,2,
212,531.077,1.907,,4,1,2,F028_angry_cars0001-0057_half.avi
213,533.504,2.009,28.0,5,1,2,TARGET
214,536.072,1.897,,1,1,2,F028_angry_anticars0001-0057_half.avi


### Variables that can be used as Input for the first transformation

In [8]:
first_sub.variables

{'button_pushed': <SparseRunVariable(name='button_pushed', source='events')>,
 'identity': <SparseRunVariable(name='identity', source='events')>,
 'caric_cond': <SparseRunVariable(name='caric_cond', source='events')>,
 'expression': <SparseRunVariable(name='expression', source='events')>,
 'stim_name': <SparseRunVariable(name='stim_name', source='events')>}

### There are currently no dense variables (defined for each TR).  I assume the derivatives would show up here.

In [9]:
first_sub.get_dense_variables()

[]

### Running the node (and applying transformations)

These are the `Transformations` that wil be applied:

In [10]:
graph.model['nodes'][0]['transformations']

{'transformer': 'pybids-transforms-v1',
 'instructions': [{'name': 'Factor', 'input': ['identity']},
  {'name': 'Convolve', 'input': ['identity.*'], 'model': 'spm'}]}

In [11]:
outputs = root_node.run(
    group_by=root_node.group_by, force_dense=False, transformation_history=True
)

ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

We get a `BIDSStatsModelsNodeOutput` for every run/subject
(I only included 1 run and 1 subject)

In [None]:
outputs

In [None]:
first_output = outputs[0]
first_output

In [None]:
first_output.X

In [None]:
plot_design_matrix(first_output.X)

## Transformation history
You can break it down by each transformation too!

In [None]:
trans_hist = first_output.trans_hist
trans_hist

In [None]:
# First one
print(trans_hist[0])

print(trans_hist[0].output)

### Important note
The following will output both sparse and dense.  In this case there are only sparse variables.  As you'll below (after convolution) the sparse variables will be made into dense (so they can all be present in the same dataframe) and that can be a bit confusing (IMHO).

In [None]:
trans_hist[0].output.to_df(entities=False)

### What did the first transformation (Factor) do?

Recall that, by default, the `duration` and `onset` columns will be used for the durations/onsets and so the values in, for example, `trial_type.congruent_correct` are essentially modulations if you choose to convolve (where 0 is going to represent nothing happening).

In [None]:
trans_hist[1].output.to_df(entities=False)

### Second transformation (Convolve)
Now things will be dense and I find the data frame to be a bit confusing for things that have not been convolved yet.  Code to display dense only or sparse only is below.

Note, the time (essentially the index) is a finer resolution than the TR, but as we know from looking at X above, it must downsample to the TR when making the design matrix. (Just a guess)


In [None]:
trans_hist[2].output.to_df(entities=False)

In [None]:
# If you only want to see the dense variables, you can do this
trans_hist[2].output.to_df(entities=False, include_sparse=False)

In [None]:
# If you only want to see the sparse variables in sparse form, you can do this
trans_hist[2].output.to_df(entities=False, include_dense=False)