This notebook uses our own EEG library to reproduce the N400 effect in the naturalistic dataset of Brennan et al. 2018.

In [24]:
from pathlib import Path

import mne
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm

basedir = Path("..").resolve()
import sys
sys.path.append(str(basedir))

DATA_DIR = Path("/om/data/public/language-eeg/brennan2018-v2")

FILTER_WINDOW = (0.2, 20)

EPOCH_WINDOW = (-0.1, 0.924)
TEST_WINDOW = (0.3, 0.5)
BASELINE_WINDOW = (None, 0)

# TODO: What is right for this montage?
# N400_ELECTRODES = ["1", "14", "24", "25", "26", "29", "30", "31", "41", "42", "44", "45"]
# Picking some central anterior sensors here
N400_ELECTRODES = ["45", "34", "35", "1"]

In [20]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [3]:
from mfn400.adapters.brennan2018 import BrennanDatasetAdapter

In [22]:
data = BrennanDatasetAdapter(DATA_DIR)

loading subject data:   0%|          | 0/10 [00:00<?, ?it/s]

1
Opening raw data file /om/data/public/language-eeg/brennan2018-v2/eeg/S01/S01_alice-raw.fif...
    Range : 0 ... 366524 =      0.000 ...   733.048 secs
Ready.
Reading 0 ... 366524  =      0.000 ...   733.048 secs...
3
Opening raw data file /om/data/public/language-eeg/brennan2018-v2/eeg/S03/S03_alice-raw.fif...
    Range : 0 ... 367299 =      0.000 ...   734.598 secs
Ready.
Reading 0 ... 367299  =      0.000 ...   734.598 secs...
4
Opening raw data file /om/data/public/language-eeg/brennan2018-v2/eeg/S04/S04_alice-raw.fif...
    Range : 0 ... 368449 =      0.000 ...   736.898 secs
Ready.
Reading 0 ... 368449  =      0.000 ...   736.898 secs...
5
Opening raw data file /om/data/public/language-eeg/brennan2018-v2/eeg/S05/S05_alice-raw.fif...
    Range : 0 ... 372824 =      0.000 ...   745.648 secs
Ready.
Reading 0 ... 372824  =      0.000 ...   745.648 secs...
6
Opening raw data file /om/data/public/language-eeg/brennan2018-v2/eeg/S06/S06_alice-raw.fif...
    Range : 0 ... 367449 =     

In [25]:
erp_df = data.to_erp(EPOCH_WINDOW, TEST_WINDOW, BASELINE_WINDOW, apply_baseline=False,
                     filter_window=FILTER_WINDOW)

Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.2 - 20 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.20
- Lower transition bandwidth: 0.20 Hz (-6 dB cutoff frequency: 0.10 Hz)
- Upper passband edge: 20.00 Hz
- Upper transition bandwidth: 5.00 Hz (-6 dB cutoff frequency: 22.50 Hz)
- Filter length: 8251 samples (16.502 sec)

Interpolating bad channels
    Automatic origin fit: head of radius 95.0 mm
Computing interpolation matrix from 54 sensor positions
Interpolating 7 sensors
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.2 - 20 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband

to_epochs:   0%|          | 0/10 [00:00<?, ?it/s]

preparing ERP df:   0%|          | 0/10 [00:00<?, ?it/s]

In [26]:
erp_df

Unnamed: 0_level_0,Unnamed: 1_level_0,segment_idx,sentence_idx,word_idx,Word,onset,offset,Order,LogFreq,SndPower,Length,...,57_baseline,58_baseline,59_baseline,60_baseline,61_baseline,VEOG_baseline,Aux5_baseline,29_baseline,AUD,AUD_baseline
subject_idx,index,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1,0,1,1,1,Alice,3.772000,4.334721,1,8.65,3.621500e-07,0.562721,...,-8.262043,-4.312627,-5.447569,0.101826,3.677714,2.225084e+06,58973.355255,-3.022705,,
1,1,1,1,2,was,4.288721,4.556543,2,14.56,3.843500e-09,0.267822,...,-7.171473,-9.616954,-13.450274,-14.379215,-14.460006,2.277766e+06,317729.892392,-3.275391,,
1,2,1,1,3,beginning,4.510543,5.028929,3,10.69,3.686500e-09,0.518386,...,-5.570477,-3.694655,-2.929495,3.821794,5.618611,2.278773e+06,264685.650813,-1.998649,,
1,3,1,1,4,to,4.982929,5.124925,4,16.35,3.969700e-09,0.141996,...,-5.131527,1.499140,7.119355,8.334045,6.973427,2.285470e+06,810552.359796,-2.327362,,
1,4,1,1,5,get,5.078925,5.388327,5,13.79,3.774700e-09,0.309402,...,-0.284778,3.150235,8.077227,11.580093,12.183773,2.282099e+06,503885.421098,-2.157671,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13,2124,12,84,6,happens,730.972874,731.418969,2146,10.77,7.081200e-04,0.446095,...,-1.237009,-6.457704,-11.395609,-14.080873,-21.585598,7.868602e+05,,-0.151461,0.038277,66069.773672
13,2125,12,84,7,when,731.424445,731.637874,2147,13.76,3.221100e-03,0.213429,...,-1.418953,-0.308020,1.934029,0.073531,-8.090467,7.902282e+05,,1.646245,0.063910,40047.566133
13,2126,12,84,8,one,731.643350,731.805493,2148,14.17,1.984200e-03,0.162143,...,8.002053,14.061592,18.356973,22.400369,17.418582,7.906945e+05,,3.568817,0.018782,53636.747467
13,2127,12,84,9,eats,731.810969,732.068894,2149,8.15,2.473000e-05,0.257925,...,-1.660698,-1.152969,-3.064865,-3.338283,-5.959085,7.907120e+05,,0.140368,0.046487,58484.182657


In [27]:
our_erp_df = erp_df.copy()
# Average over N400 electrodes as given in paper.
n400_electrodes = set(N400_ELECTRODES) & set(our_erp_df.columns)
our_erp_df = our_erp_df.reset_index().set_index(["subject_idx", "sentence_idx", "word_idx"]) \
    .loc[:, n400_electrodes].mean(axis=1).rename("n400")
our_erp_df = pd.DataFrame(our_erp_df)
our_erp_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,n400
subject_idx,sentence_idx,word_idx,Unnamed: 3_level_1
1,1,1,3.803698
1,1,2,-6.611370
1,1,3,4.727505
1,1,4,11.894655
1,1,5,7.846100
...,...,...,...
13,84,6,-1.646821
13,84,7,-6.349737
13,84,8,-1.737450
13,84,9,4.666635


In [28]:
our_baseline_df = erp_df.reset_index().set_index(["subject_idx", "sentence_idx", "word_idx"]) \
    .loc[:, [f"{el}_baseline" for el in n400_electrodes]].mean(axis=1).rename("our_baseline")
our_baseline_df

subject_idx  sentence_idx  word_idx
1            1             1            1.324047
                           2           -5.790971
                           3            6.099852
                           4            5.143346
                           5            5.334951
                                         ...    
13           84            6           -6.698585
                           7           -3.065428
                           8           15.875136
                           9            0.682964
                           10          -3.311757
Name: our_baseline, Length: 21290, dtype: float64

In [29]:
merged_df = pd.merge(erp_df.reset_index(), our_erp_df, 
                     left_on=["subject_idx", "sentence_idx", "word_idx"],
                     right_index=True)
merged_df = pd.merge(merged_df, our_baseline_df,
                     left_on=["subject_idx", "sentence_idx", "word_idx"],
                     right_index=True)
merged_df

Unnamed: 0,subject_idx,index,segment_idx,sentence_idx,word_idx,Word,onset,offset,Order,LogFreq,...,59_baseline,60_baseline,61_baseline,VEOG_baseline,Aux5_baseline,29_baseline,AUD,AUD_baseline,n400,our_baseline
0,1,0,1,1,1,Alice,3.772000,4.334721,1,8.65,...,-5.447569,0.101826,3.677714,2.225084e+06,58973.355255,-3.022705,,,3.803698,1.324047
1,1,1,1,1,2,was,4.288721,4.556543,2,14.56,...,-13.450274,-14.379215,-14.460006,2.277766e+06,317729.892392,-3.275391,,,-6.611370,-5.790971
2,1,2,1,1,3,beginning,4.510543,5.028929,3,10.69,...,-2.929495,3.821794,5.618611,2.278773e+06,264685.650813,-1.998649,,,4.727505,6.099852
3,1,3,1,1,4,to,4.982929,5.124925,4,16.35,...,7.119355,8.334045,6.973427,2.285470e+06,810552.359796,-2.327362,,,11.894655,5.143346
4,1,4,1,1,5,get,5.078925,5.388327,5,13.79,...,8.077227,11.580093,12.183773,2.282099e+06,503885.421098,-2.157671,,,7.846100,5.334951
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21285,13,2124,12,84,6,happens,730.972874,731.418969,2146,10.77,...,-11.395609,-14.080873,-21.585598,7.868602e+05,,-0.151461,0.038277,66069.773672,-1.646821,-6.698585
21286,13,2125,12,84,7,when,731.424445,731.637874,2147,13.76,...,1.934029,0.073531,-8.090467,7.902282e+05,,1.646245,0.063910,40047.566133,-6.349737,-3.065428
21287,13,2126,12,84,8,one,731.643350,731.805493,2148,14.17,...,18.356973,22.400369,17.418582,7.906945e+05,,3.568817,0.018782,53636.747467,-1.737450,15.875136
21288,13,2127,12,84,9,eats,731.810969,732.068894,2149,8.15,...,-3.064865,-3.338283,-5.959085,7.907120e+05,,0.140368,0.046487,58484.182657,4.666635,0.682964


In [30]:
merged_df.to_csv("brennan_erp_n400.csv")

---

In [13]:
merged_df.our_baseline / 1e3

0      -2.645913
1      -2.654308
2      -2.653121
3      -2.657983
4      -2.662320
          ...   
8511    1.174708
8512    1.170416
8513    1.172266
8514    1.170332
8515    1.172322
Name: our_baseline, Length: 8516, dtype: float64