This notebook uses our own EEG library to reproduce the N400 effect in the naturalistic dataset of Brennan et al. 2018.

In [1]:
from pathlib import Path

import mne
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm

basedir = Path("..").resolve()
import sys
sys.path.append(str(basedir))

DATA_DIR = Path("/om/data/public/language-eeg/brennan2018-v2")

EPOCH_WINDOW = (-0.1, 0.924)
TEST_WINDOW = (0.3, 0.5)
BASELINE_WINDOW = (None, 0)

N400_ELECTRODES = ["1", "14", "24", "25", "26", "29", "30", "31", "41", "42", "44", "45"]

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
from mfn400.adapters.brennan2018 import BrennanDatasetAdapter

In [13]:
data = BrennanDatasetAdapter(DATA_DIR)

1
Opening raw data file /om/data/public/language-eeg/brennan2018-v2/eeg/S01/S01_alice-raw.fif...
    Range : 0 ... 366524 =      0.000 ...   733.048 secs
Ready.
Reading 0 ... 366524  =      0.000 ...   733.048 secs...
                  onset  duration orig_time
segment_idx                                
1              3.726000  0.100000      None
2             61.344158  0.100002      None
3            122.241280  0.099998      None
4            185.553619  0.099991      None
5            255.596848  0.100006      None
6            321.924744  0.100006      None
7            385.753265  0.100006      None
8            448.712006  0.100006      None
9            506.075989  0.100006      None
10           563.354004  0.100037      None
11           624.665527  0.099976      None
12           680.876526  0.099976      None


TypeError: cannot unpack non-iterable NoneType object

In [5]:
erp_df = data.to_erp(EPOCH_WINDOW, TEST_WINDOW, BASELINE_WINDOW, apply_baseline=False)

EEG channel type selected for re-referencing
Applying a custom ('EEG',) reference.




In [6]:
erp_df

Unnamed: 0_level_0,Unnamed: 1_level_0,sentence_idx,word_idx,Word,onset,offset,Order,LogFreq,SndPower,Length,IsLexical,...,51_baseline,52_baseline,53_baseline,54_baseline,55_baseline,56_baseline,57_baseline,58_baseline,59_baseline,61_baseline
subject_idx,index,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1,0,1,2,was,0.562721,0.830543,2,14.56,3.843500e-09,0.267822,0.0,...,1.897620e+09,4.290908e+09,4.756621e+09,4.446411e+09,3.089086e+09,3.558622e+09,1.835225e+09,2.403503e+09,1.022068e+08,-3.428630e+09
1,1,1,3,beginning,0.784543,1.302929,3,10.69,3.686500e-09,0.518386,1.0,...,1.912594e+09,4.300643e+09,4.763610e+09,4.449984e+09,3.091016e+09,3.557884e+09,1.829306e+09,2.396451e+09,8.955557e+07,-3.458419e+09
1,2,1,4,to,1.256929,1.398925,4,16.35,3.969700e-09,0.141996,0.0,...,1.893896e+09,4.295244e+09,4.760382e+09,4.449672e+09,3.094381e+09,3.566020e+09,1.839872e+09,2.416793e+09,1.252920e+08,-3.375843e+09
1,3,1,5,get,1.352925,1.662327,5,13.79,3.774700e-09,0.309402,0.0,...,1.898223e+09,4.300803e+09,4.764460e+09,4.451666e+09,3.094388e+09,3.563430e+09,1.835919e+09,2.415982e+09,1.228439e+08,-3.375182e+09
1,4,1,6,very,1.616327,2.356749,6,13.28,4.072700e-09,0.740422,1.0,...,1.892716e+09,4.293851e+09,4.759716e+09,4.448964e+09,3.092244e+09,3.564988e+09,1.838467e+09,2.421501e+09,1.311945e+08,-3.344629e+09
1,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1,2123,84,6,happens,716.435134,716.881229,2146,10.77,7.081200e-04,0.446095,1.0,...,1.981219e+09,4.547551e+09,5.339815e+09,4.814450e+09,3.615564e+09,3.945354e+09,2.079123e+09,3.563237e+09,-4.354617e+08,-4.368689e+09
1,2124,84,7,when,716.886705,717.100134,2147,13.76,3.221100e-03,0.213429,0.0,...,2.005534e+09,4.559524e+09,5.344749e+09,4.813101e+09,3.615751e+09,3.944426e+09,2.079532e+09,3.556316e+09,-4.494533e+08,-4.373702e+09
1,2125,84,8,one,717.105610,717.267753,2148,14.17,1.984200e-03,0.162143,1.0,...,2.039241e+09,4.583705e+09,5.365800e+09,4.831578e+09,3.631448e+09,3.955403e+09,2.090661e+09,3.570949e+09,-4.201038e+08,-4.066513e+09
1,2126,84,9,eats,717.273229,717.531154,2149,8.15,2.473000e-05,0.257925,1.0,...,2.040008e+09,4.586500e+09,5.369404e+09,4.834983e+09,3.631816e+09,3.953651e+09,2.089788e+09,3.566811e+09,-4.371132e+08,-4.214680e+09


In [7]:
our_erp_df = erp_df.copy()
# Average over N400 electrodes as given in paper.
n400_electrodes = set(N400_ELECTRODES) & set(our_erp_df.columns)
our_erp_df = our_erp_df.reset_index().set_index(["subject_idx", "sentence_idx", "word_idx"]) \
    .loc[:, n400_electrodes].mean(axis=1).rename("n400")
our_erp_df = pd.DataFrame(our_erp_df)
our_erp_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,n400
subject_idx,sentence_idx,word_idx,Unnamed: 3_level_1
1,1,2,9.498928e+08
1,1,3,9.523749e+08
1,1,4,9.720641e+08
1,1,5,9.728836e+08
1,1,6,9.690740e+08
1,...,...,...
1,84,6,1.154078e+09
1,84,7,1.163581e+09
1,84,8,1.148368e+09
1,84,9,1.149651e+09


In [9]:
our_baseline_df = erp_df.reset_index().set_index(["subject_idx", "sentence_idx", "word_idx"]) \
    .loc[:, [f"{el}_baseline" for el in n400_electrodes]].mean(axis=1).rename("our_baseline")
our_baseline_df

subject_idx  sentence_idx  word_idx
1            1             2           9.463307e+08
                           3           9.492573e+08
                           4           9.525666e+08
                           5           9.555627e+08
                           6           9.580937e+08
                                           ...     
             84            6           1.141134e+09
                           7           1.151971e+09
                           8           1.187167e+09
                           9           1.178665e+09
                           10          1.144037e+09
Name: our_baseline, Length: 2128, dtype: float64

In [12]:
merged_df = pd.merge(erp_df.reset_index(), our_erp_df, 
                     left_on=["subject_idx", "sentence_idx", "word_idx"],
                     right_index=True)
merged_df = pd.merge(merged_df, our_baseline_df,
                     left_on=["subject_idx", "sentence_idx", "word_idx"],
                     right_index=True)
merged_df

Unnamed: 0,subject_idx,index,sentence_idx,word_idx,Word,onset,offset,Order,LogFreq,SndPower,...,53_baseline,54_baseline,55_baseline,56_baseline,57_baseline,58_baseline,59_baseline,61_baseline,n400,our_baseline
0,1,0,1,2,was,0.562721,0.830543,2,14.56,3.843500e-09,...,4.756621e+09,4.446411e+09,3.089086e+09,3.558622e+09,1.835225e+09,2.403503e+09,1.022068e+08,-3.428630e+09,9.498928e+08,9.463307e+08
1,1,1,1,3,beginning,0.784543,1.302929,3,10.69,3.686500e-09,...,4.763610e+09,4.449984e+09,3.091016e+09,3.557884e+09,1.829306e+09,2.396451e+09,8.955557e+07,-3.458419e+09,9.523749e+08,9.492573e+08
2,1,2,1,4,to,1.256929,1.398925,4,16.35,3.969700e-09,...,4.760382e+09,4.449672e+09,3.094381e+09,3.566020e+09,1.839872e+09,2.416793e+09,1.252920e+08,-3.375843e+09,9.720641e+08,9.525666e+08
3,1,3,1,5,get,1.352925,1.662327,5,13.79,3.774700e-09,...,4.764460e+09,4.451666e+09,3.094388e+09,3.563430e+09,1.835919e+09,2.415982e+09,1.228439e+08,-3.375182e+09,9.728836e+08,9.555627e+08
4,1,4,1,6,very,1.616327,2.356749,6,13.28,4.072700e-09,...,4.759716e+09,4.448964e+09,3.092244e+09,3.564988e+09,1.838467e+09,2.421501e+09,1.311945e+08,-3.344629e+09,9.690740e+08,9.580937e+08
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2123,1,2123,84,6,happens,716.435134,716.881229,2146,10.77,7.081200e-04,...,5.339815e+09,4.814450e+09,3.615564e+09,3.945354e+09,2.079123e+09,3.563237e+09,-4.354617e+08,-4.368689e+09,1.154078e+09,1.141134e+09
2124,1,2124,84,7,when,716.886705,717.100134,2147,13.76,3.221100e-03,...,5.344749e+09,4.813101e+09,3.615751e+09,3.944426e+09,2.079532e+09,3.556316e+09,-4.494533e+08,-4.373702e+09,1.163581e+09,1.151971e+09
2125,1,2125,84,8,one,717.105610,717.267753,2148,14.17,1.984200e-03,...,5.365800e+09,4.831578e+09,3.631448e+09,3.955403e+09,2.090661e+09,3.570949e+09,-4.201038e+08,-4.066513e+09,1.148368e+09,1.187167e+09
2126,1,2126,84,9,eats,717.273229,717.531154,2149,8.15,2.473000e-05,...,5.369404e+09,4.834983e+09,3.631816e+09,3.953651e+09,2.089788e+09,3.566811e+09,-4.371132e+08,-4.214680e+09,1.149651e+09,1.178665e+09


In [14]:
merged_df.to_csv("brennan_erp_n400.csv")

---

In [17]:
merged_df.our_baseline / 1e6

0        946.330657
1        949.257259
2        952.566615
3        955.562683
4        958.093661
           ...     
2123    1141.134238
2124    1151.970997
2125    1187.166501
2126    1178.665301
2127    1144.037437
Name: our_baseline, Length: 2128, dtype: float64