# Overview

For two weeks now, we have generated plots of brain data related to experimental events. Today we will revisit and expand upon those plots, and see what kind of inferences we can make about the brain. 

We will re-visit event-related averages in both fMRI and EEG data.

# Update neurods

In [None]:
# Only needs to be done once! Then you can skip this cell in future runs through notebook
# by setting the statement here to False
if False:
    import neurods
    neurods.io.update_neurods()

In [None]:
# imports
import os
import mne # for EEG data
import neurods 
import cortex # for making brain plots
import numpy as np
from scipy.stats import zscore
import matplotlib.pyplot as plt
# New (to us) modules:
import glob # for listing files in directories
import json # for loading meta-data from structured text files

In [None]:
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

In [None]:
# New functions
#neurods.io.load_eeg_data?
neurods.io.load_fmri_data?

# fMRI event-related averages

In [None]:
# Specify data files
fmri_files1 = ['s01_categories_{:02d}.nii.gz'.format(run) for run in [1,2,3]]
fmri_files1 = [os.path.join(neurods.io.data_list['fmri'], 'categories', f) for f in fmri_files1]
fmri_design_file = os.path.join(neurods.io.data_list['fmri'], 'categories', 'experiment_design.npz')
fmri_files2 = ['s01_motorloc.nii.gz']


In [None]:
# Get data
sub, xfm = 's01', 'catloc'
cortical_voxels = cortex.db.get_mask(sub, xfm, type='cortical')
fdata = neurods.io.load_fmri_data(fmri_files1[0], mask=cortical_voxels, do_zscore=True, dtype=np.float32)
# Get experiment design
design = np.load(fmri_design_file)
condition_names = design['conditions'].tolist()
print(condition_names)

In [None]:
# Count the voxels in the cortical mask
cortical_voxels.sum()

In [None]:
# examine the data
fdata.shape

In [None]:
design['run1'].shape

In [None]:
# Set up variable to store condition average(s)
cond_avg_data = dict()
cond_avg_vols = dict()
# Some housekeeping
event_duration = 10
cond_num = 0
cond_name = 'body'
# Get onset indices
cond_bool = design['run1'][:, cond_num]
onset_indices = neurods.utils.get_onsets(cond_bool)
print(onset_indices)
tmp = []
for oi in onset_indices:
    tmp.append(fdata[oi:oi+event_duration, :])
tmp = np.array(tmp)
print('tmp variable shape:')
print(tmp.shape)

tmp = np.mean(tmp, axis=0)
# Create pycortex object for volume movie
cond_avg_data[cond_name] = tmp
cond_avg_vols[cond_name] = cortex.Volume(tmp, sub, xfm, vmin=-3, vmax=3, cmap='RdBu_r', mask=cortical_voxels)
print('mean response to {}: '.format(cond_name), tmp.shape)
del tmp

In [None]:
cond_avg_vols

In [None]:
cortex.webgl.show(cond_avg_vols)

# Let's do this for all the conditions!

In [None]:
### STUDENT ANSWER
# Set up variable to store condition average(s)
cond_avg_data = dict()
cond_avg_vols = dict()
# Some housekeeping
event_duration = 10
n_conditions = len(condition_names)
for cond_num in range(n_conditions):
    cond_name = condition_names[cond_num]
    # Get onset indices
    cond_bool = design['run1'][:, cond_num]
    onset_indices = neurods.utils.get_onsets(cond_bool)
    #print(onset_indices)
    tmp = []
    for oi in onset_indices:
        tmp.append(fdata[oi:oi+event_duration, :])
    tmp = np.array(tmp)
    #print('tmp variable shape:')
    #print(tmp.shape)

    tmp = np.mean(tmp, axis=0)
    # Create pycortex object for volume movie
    cond_avg_data[cond_name] = tmp
    cond_avg_vols[cond_name] = cortex.Volume(tmp, sub, xfm, vmin=-3, vmax=3, cmap='RdBu_r', mask=cortical_voxels)
    print('mean response to {}: '.format(cond_name), tmp.shape)
    del tmp

In [None]:
cortex.webgl.show(cond_avg_vols)

# How would we go about doing this for ALL the data?
Implement these averages with **ALL THE DATA** available (3 runs). 

In [None]:
fmri_files1

In [None]:
wtf = [design[run] for run in ['run1', 'run2', 'run3']]

In [None]:
wtf[2].shape

In [None]:
### STUDENT ANSWER
data_full = neurods.io.load_fmri_data(*fmri_files1, mask=cortical_voxels,
                                     do_zscore=True, dtype=np.float32)
print("Full data shape: ", data_full.shape)
design_full = np.vstack([design[run] for run in ['run1', 'run2', 'run3']])
print("Full design shape: ", design_full.shape)

In [None]:
### STUDENT ANSWER
### STUDENT ANSWER
# Set up variable to store condition average(s)
cond_avg_data = dict()
cond_avg_vols = dict()
# Some housekeeping
event_duration = 10
n_conditions = len(condition_names)
for cond_num in range(n_conditions):
    cond_name = condition_names[cond_num]
    # Get onset indices
    cond_bool = design_full[:, cond_num]
    onset_indices = neurods.utils.get_onsets(cond_bool)
    print(onset_indices)
    tmp = []
    for oi in onset_indices:
        tmp.append(data_full[oi:oi+event_duration, :])
    tmp = np.array(tmp)
    print('Concatenated tmp variable shape:')
    print(tmp.shape)

    tmp = np.mean(tmp, axis=0)
    # Create pycortex object for volume movie
    cond_avg_data[cond_name] = tmp
    cond_avg_vols[cond_name] = cortex.Volume(tmp, sub, xfm, vmin=-3, vmax=3, cmap='RdBu_r', mask=cortical_voxels)
    print('mean response to {}: '.format(cond_name), tmp.shape)
    del tmp

In [None]:
cortex.webgl.show(cond_avg_vols)

# Let's look at a mid-event response (the 5th TR)

In [None]:
# Show 5th TR for each
tr = 4
tr5_volumes = dict()
for cond_name in condition_names:
    tr5_volumes[cond_name] = cortex.Volume(cond_avg_data[cond_name][tr,:], sub, xfm, 
                                            vmin=-3, vmax=3, cmap='RdBu_r', mask=cortical_voxels)
cortex.webgl.show(tr5_volumes)

> Breakout session
* Are you happy with looking at the 5th TR? Can you think of a better way to compress the timecourse down to a single volume?
* What do these event related avarages tell you? 
* What else do you want to know to help you interpret the data? (make a list!)

In [None]:
# More exploratory analyses!

# EEG event-related averages

In [None]:
neurods.io.load_eeg_data?

In [None]:
subject, session = 8, 1
fdir = os.path.join(neurods.io.data_list['eeg'], 'epfl_p300', 'subject{}', 'session{}').format(subject, session)
# Load data from file
edatafile = os.path.join(fdir, 'eeg_run06-raw.fif')
times, edata = neurods.io.load_eeg_data(edatafile, return_mne=False)
# Load events from file
eeventfile = os.path.join(fdir, 'events_run06-eve.txt')
events = mne.read_events(eeventfile)

In [None]:
edata.shape

In [None]:
events.shape

In [None]:
# Relevant information
sfreq = 2048.
tmin = -0.1
tmax = 0.6
n_conditions = 6
cond_epochs = [[] for x in range(n_conditions)] 

for cond in range(n_conditions):
    # Columns in events are [onset_indices, <blank>, condition_number]
    # Select event indices in condition 1
    cond_idx = events[:, 2]==(cond+1)
    onsets = events[cond_idx, 0]    
    for onset in onsets:
        # Find indices for start and stop of conditions, given 
        imin = onset + int(tmin*sfreq) 
        imax = onset + int(tmax*sfreq) 
        data_slice = edata[:, imin:imax]
        data_slice = zscore(data_slice, axis=1)
        cond_epochs[cond].append(data_slice)
    cond_epochs[cond] = np.array(cond_epochs[cond])

In [None]:
cond_epochs[0].shape

In [None]:
### STUDENT ANSWER


In [None]:
print(tmp.shape)
print(tmp.mean(0).shape)
print(tmp.mean(0).mean(0).shape)

In [None]:
event_time = np.linspace(-0.1, 0.6, cond_epochs[0].shape[-1])
for cond in range(6):
    tmp = cond_epochs[cond]
    event_electrode_avg = tmp.mean(0).mean(0)
    plt.plot(event_time, event_electrode_avg, label='cond %d'%(cond+1))
_ = plt.legend()

In [None]:
### STUDENT ANSWER
# Relevant information
sfreq = 2048.
tmin = -0.1
tmax = 0.6
n_conditions = 6
cond_epochs = [[] for x in range(n_conditions)] 

for cond in range(n_conditions):
    # Columns in events are [onset_indices, <blank>, condition_number]
    # Select event indices in condition 1
    cond_idx = events[:, 2]==(cond+1)
    onsets = events[cond_idx, 0][:-1]
    cond_epochs[cond], etimes  = neurods.utils.extract_epochs(edata, 
        onsets, tmin=-0.1, tmax=0.6, baseline_type='mean',)

In [None]:
event_time = np.linspace(-0.1, 0.6, cond_epochs[0].shape[-1])
for cond in range(6):
    tmp = cond_epochs[cond]
    event_electrode_avg = tmp.mean(0).mean(0)
    plt.plot(event_time, event_electrode_avg, label='cond %d'%(cond+1))
plt.xlabel('Time (s)')
plt.ylabel('Mean EEG activity')
_ = plt.legend()

# But! We know more about this data than just condition names!
One image per run was a *target* - the subjects had to count how many times it apppeared. Did that affect the response to that image?

In [None]:
# Note: %load magic function just shows you the contents of a text file, by loading that
# file into the cell

In [None]:
# %load /data/shared/cogneuro88/eeg/epfl_p300/subject8/session1/metadata_run06.json
{"target": 5, "event_id": {"radio": 6, "tv": 1, "window": 5, "door": 4, "phone": 2, "light": 3}}

In [None]:
metafile

In [None]:
# Load meta-data about experiment
metafile = os.path.join(fdir, 'metadata_run06.json')
metadata = json.load(open(metafile))
print(metadata)

Subjects were attending to - i.e., watching for - the appearance of condition 6 (which, as the other part of the meta-data tells us, was a picture of a radio)

In [None]:
# Find out which image was the target
target = metadata['target']
# Relevant information
sfreq = 2048.
tmin = -0.1
tmax = 0.6
n_conditions = 6
cond_epochs = [[] for x in range(n_conditions)] 
targets = []
not_targets = []
for cond in range(n_conditions):
    # Columns in events are [onset_indices, <blank>, condition_number]
    # Select event indices in condition 1
    cond_idx = events[:, 2]==(cond+1)
    onsets = events[cond_idx, 0][:-1]
    tmp, etimes  = neurods.utils.extract_epochs(edata, onsets, tmin=-0.1, tmax=0.6, 
                                                baseline_type='mean', baseline_times=(None, 0), sfreq=2048.)
    if (cond+1)==target:
        targets.append(tmp)
    else:
        not_targets.append(tmp)

Some checking up on the variables we have created:

In [None]:
len(targets)

In [None]:
len(not_targets)

In [None]:
print(targets[0].shape)

In [None]:
not_targets[0].shape

The above outputs are sensible, because all elements of the `targets` and `not_targets` lists should be arrays of the same size - (repeats x electrodes x time)

In [None]:
# Concatenate images that were NOT a target, and images that were the target
not_targets = np.vstack(not_targets)
targets = np.vstack(targets)

In [None]:
# There are 5 images that weren't the target, and only one image that was the target
print('Shape of `targets` array:', targets.shape)
print('Shape of `not_targets` array:', not_targets.shape)

In [None]:
# Plot both
plt.plot(etimes, targets.mean(0).mean(0), color='r', lw=2, label='Targets')
plt.plot(etimes, not_targets.mean(0).mean(0), color='k', lw=2, label='Not targets')
plt.legend()

Note that the above plot is not really a fair comparison, because one condition (one image) was the target, and other images were the non-targets. What if the image in the target condition simply elicited a bigger response? Then we might mistake a large response to this specific image for a large response to a target of attention! (Maybe there is no effect of attention at all!) 
To test this possibility, we have to examine the response to EACH image as a target, and also as a "not target" (a distractor). That is what the cell below computes.

# Now we go nuts, and compute the same thing for many runs, with each image used in turn as a target

In [None]:
# Basic data paths
subject = 8 # 1-2 or 8 (9 total, not all downloaded...)

image_as_target1 = {}
image_not_target1 = {}


session = 1 # 1-4
base_path = os.path.join(neurods.io.data_list['eeg'],
            'epfl_p300/subject{}/session{}/').format(subject, session)
datafiles = sorted(glob.glob(os.path.join(base_path, '*fif')))
eventfiles = sorted(glob.glob(os.path.join(base_path, '*txt')))
metafiles = sorted(glob.glob(os.path.join(base_path, '*json')))

for fdata, fevent, fmeta in zip(datafiles, eventfiles, metafiles):
    dtimes, data = neurods.io.load_eeg_data(fdata, return_mne=False)
    events = mne.read_events(fevent)
    meta = json.load(open(fmeta, mode='r'))
    target = meta['target']
    print('Target is: ',target)
    for cond in sorted(meta['event_id'].values()):
        cond_idx = events[:,2]==cond
        onset_indices = events[cond_idx, 0]
        print('sum= {}, {} indices; first index is {}'.format(cond_idx.sum(), onset_indices[:-1].shape, onset_indices[0]))
        epochs_tmp, times = neurods.utils.extract_epochs(data, onset_indices[:-1], is_verbose=False)
        if cond == target:
            if cond not in image_as_target1:
                image_as_target1[cond] = epochs_tmp
            else:
                image_as_target1[cond] = np.vstack([image_as_target1[cond], epochs_tmp])
        else:
            if cond not in image_not_target1:
                image_not_target1[cond] = epochs_tmp
            else:
                image_not_target1[cond] = np.vstack([image_not_target1[cond], epochs_tmp])

In [None]:
image_as_target1.keys()

In [None]:
image_as_target1[1].shape

# Compute event-related averages for all images, for all runs

In [None]:
fig, axs = plt.subplots(3, 2, figsize=(8,8), sharey=True, sharex=True)
for ii, ax in enumerate(axs.flatten(), 1):
    # average over repeats, average over electrodes
    ax.plot(times, image_as_target1[ii].mean(0).mean(0), lw=2, color='red', label='Target')
    ax.plot(times, image_not_target1[ii].mean(0).mean(0), lw=2, color='gray', label='Distractor')
    stim_onset = 0
    p300 = 0.3
    ax.vlines([stim_onset, p300], *ax.get_ylim(), colors='y', linestyle='--')
    ax.legend(frameon=False, ncol=2)

# Questions for the rest of the class:

* Are responses to different kinds of events (different conditions) reliably different? (in time, in space)
* How can we assess how reliable a difference between conditions is?
* 

In [None]:
### STUDENT ANSWER


# Sample test questions
What are the following two variables? Find out as much as you can about them!

This is a 2D array, and all you know about it is that it's called "events". What is this likely to be?

In [None]:
# File 1 ("events" variable)
d1_url = 'https://www.dropbox.com/s/w8pu4ph5qon3yhq/events.npz'
d1_fname = 'events.npz'
if not os.path.exists(d1_fname):
    neurods.io.download_file(d1_url, d1_fname, root_destination=os.path.abspath(os.curdir))
mdata1 = np.load(d1_fname)['events']
# WHO AM I??
mdata1.shape

In [None]:
### STUDENT ANSWER


This is obviously brain data of some kind - what is it? Which dimension is which? How can you find out?

In [None]:
import nibabel

In [None]:
# File 2 (mystery brain data)
d2_url = 'https://www.dropbox.com/s/o8e9ko9zik6upvn/mystery.nii.gz'
d2_fname = 'mystery.nii.gz'
if not os.path.exists(d2_fname):
    neurods.io.download_file(d2_url, d2_fname, root_destination=os.path.abspath(os.curdir))
niix = nibabel.load(d2_fname)
# WHO AM I??
mdata2 = niix.get_data()

In [None]:
### STUDENT ANSWER
mdata2.shape

# Fix the following plots:

In [None]:
# Make this more informative
data = np.random.randn(100,10)
_ = plt.hist(data)

In [None]:
# Run this cell first. No fair changing this!
rdata2 = np.random.rand(100,100)
rdata2[3,4] = 25

In [None]:
# Make this a more informative plot
plt.imshow(rdata2, interpolation='nearest')