This notebook uses our own EEG library to reproduce the N400 effect in the naturalistic dataset of Brennan et al. 2018.

In [1]:
import logging
logging.basicConfig(level=logging.DEBUG, handlers=[])
from pathlib import Path

import mne
mne.set_log_level(logging.WARNING)
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm

basedir = Path("..").resolve()
import sys
sys.path.append(str(basedir))

DATA_DIR = Path("/om/data/public/language-eeg/brennan2018-v2")

FILTER_WINDOW = (0.5, 20)

EPOCH_WINDOW = (-0.1, 0.924)
TEST_WINDOW = (0.3, 0.5)
BASELINE_WINDOW = (None, 0)

# TODO: What is right for this montage?
# N400_ELECTRODES = ["1", "14", "24", "25", "26", "29", "30", "31", "41", "42", "44", "45"]
# Picking some central sensors here, following Gillis
N400_ELECTRODES = ["1", "4", "5", "6", "15", "14", "13"]

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
from mfn400.adapters.brennan2018 import BrennanDatasetAdapter

In [4]:
data = BrennanDatasetAdapter(DATA_DIR)

loading subject data:   0%|          | 0/33 [00:00<?, ?it/s]

2022-06-17 12:50:30 |[36m DEBUG    [0m| mfn400.adapters.brennan2018 | Loading subject 1
2022-06-17 12:50:30 |[36m DEBUG    [0m| mfn400.adapters.brennan2018 | Loading subject 1
2022-06-17 12:50:33 |[36m DEBUG    [0m| mfn400.adapters.brennan2018 | Loading subject 3
2022-06-17 12:50:33 |[36m DEBUG    [0m| mfn400.adapters.brennan2018 | Loading subject 3
2022-06-17 12:50:35 |[36m DEBUG    [0m| mfn400.adapters.brennan2018 | Loading subject 4
2022-06-17 12:50:35 |[36m DEBUG    [0m| mfn400.adapters.brennan2018 | Loading subject 4
2022-06-17 12:50:37 |[36m DEBUG    [0m| mfn400.adapters.brennan2018 | Loading subject 5
2022-06-17 12:50:37 |[36m DEBUG    [0m| mfn400.adapters.brennan2018 | Loading subject 5
2022-06-17 12:50:40 |[36m DEBUG    [0m| mfn400.adapters.brennan2018 | Loading subject 6
2022-06-17 12:50:40 |[36m DEBUG    [0m| mfn400.adapters.brennan2018 | Loading subject 6
2022-06-17 12:50:42 |[36m DEBUG    [0m| mfn400.adapters.brennan2018 | Loading subject 8
2022-06-17

In [5]:
erp_df = data.to_erp(EPOCH_WINDOW, TEST_WINDOW, BASELINE_WINDOW, apply_baseline=False,
                     filter_window=FILTER_WINDOW)

preprocessing subjects:   0%|          | 0/33 [00:00<?, ?it/s]

2022-06-17 12:51:45 |[36m DEBUG    [0m| mfn400.adapters.brennan2018 | Preprocessing subject 1
2022-06-17 12:51:45 |[36m DEBUG    [0m| mfn400.adapters.brennan2018 | Preprocessing subject 1
2022-06-17 12:51:48 |[36m DEBUG    [0m| mfn400.adapters.brennan2018 | Preprocessing subject 3
2022-06-17 12:51:48 |[36m DEBUG    [0m| mfn400.adapters.brennan2018 | Preprocessing subject 3
2022-06-17 12:51:49 |[36m DEBUG    [0m| mfn400.adapters.brennan2018 | Preprocessing subject 4
2022-06-17 12:51:49 |[36m DEBUG    [0m| mfn400.adapters.brennan2018 | Preprocessing subject 4
2022-06-17 12:51:50 |[36m DEBUG    [0m| mfn400.adapters.brennan2018 | Preprocessing subject 5
2022-06-17 12:51:50 |[36m DEBUG    [0m| mfn400.adapters.brennan2018 | Preprocessing subject 5
2022-06-17 12:51:51 |[36m DEBUG    [0m| mfn400.adapters.brennan2018 | Preprocessing subject 6
2022-06-17 12:51:51 |[36m DEBUG    [0m| mfn400.adapters.brennan2018 | Preprocessing subject 6
2022-06-17 12:51:52 |[36m DEBUG    [0m

  raw = raw.interpolate_bads()


2022-06-17 12:52:01 |[36m DEBUG    [0m| mfn400.adapters.brennan2018 | Preprocessing subject 17
2022-06-17 12:52:01 |[36m DEBUG    [0m| mfn400.adapters.brennan2018 | Preprocessing subject 17
2022-06-17 12:52:02 |[36m DEBUG    [0m| mfn400.adapters.brennan2018 | Preprocessing subject 18
2022-06-17 12:52:02 |[36m DEBUG    [0m| mfn400.adapters.brennan2018 | Preprocessing subject 18
2022-06-17 12:52:03 |[36m DEBUG    [0m| mfn400.adapters.brennan2018 | Preprocessing subject 19
2022-06-17 12:52:03 |[36m DEBUG    [0m| mfn400.adapters.brennan2018 | Preprocessing subject 19
2022-06-17 12:52:04 |[36m DEBUG    [0m| mfn400.adapters.brennan2018 | Preprocessing subject 20
2022-06-17 12:52:04 |[36m DEBUG    [0m| mfn400.adapters.brennan2018 | Preprocessing subject 20
2022-06-17 12:52:05 |[36m DEBUG    [0m| mfn400.adapters.brennan2018 | Preprocessing subject 21
2022-06-17 12:52:05 |[36m DEBUG    [0m| mfn400.adapters.brennan2018 | Preprocessing subject 21
2022-06-17 12:52:06 |[36m DEB

to_epochs:   0%|          | 0/33 [00:00<?, ?it/s]

2022-06-17 12:52:22 |[36m DEBUG    [0m| mfn400.adapters | to_epochs for subject 1
2022-06-17 12:52:23 |[36m DEBUG    [0m| mfn400.adapters | to_epochs for subject 3
2022-06-17 12:52:23 |[36m DEBUG    [0m| mfn400.adapters | to_epochs for subject 4
2022-06-17 12:52:24 |[36m DEBUG    [0m| mfn400.adapters | to_epochs for subject 5
2022-06-17 12:52:24 |[36m DEBUG    [0m| mfn400.adapters | to_epochs for subject 6
2022-06-17 12:52:24 |[36m DEBUG    [0m| mfn400.adapters | to_epochs for subject 8
2022-06-17 12:52:25 |[36m DEBUG    [0m| mfn400.adapters | to_epochs for subject 10
2022-06-17 12:52:25 |[36m DEBUG    [0m| mfn400.adapters | to_epochs for subject 11
2022-06-17 12:52:26 |[36m DEBUG    [0m| mfn400.adapters | to_epochs for subject 12
2022-06-17 12:52:26 |[36m DEBUG    [0m| mfn400.adapters | to_epochs for subject 13
2022-06-17 12:52:26 |[36m DEBUG    [0m| mfn400.adapters | to_epochs for subject 14
2022-06-17 12:52:27 |[36m DEBUG    [0m| mfn400.adapters | to_epochs f

preparing ERP df:   0%|          | 0/33 [00:00<?, ?it/s]

In [6]:
erp_df

Unnamed: 0_level_0,Unnamed: 1_level_0,segment_idx,sentence_idx,word_idx,Word,onset,offset,Order,LogFreq,SndPower,Length,...,57_baseline,58_baseline,59_baseline,60_baseline,61_baseline,VEOG_baseline,Aux5_baseline,29_baseline,AUD,AUD_baseline
subject_idx,index,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1,0,1,1,1,Alice,3.772000,4.334721,1,8.65,3.621500e-07,0.562721,...,-1.475362,1.018221,3.639740,9.178524,11.813051,2.225084e+06,58973.355255,-1.739714,,
1,1,1,1,2,was,4.288721,4.556543,2,14.56,3.843500e-09,0.267822,...,-2.079682,-7.315422,-9.789070,-12.821799,-13.976495,2.277766e+06,317729.892392,-1.710223,,
1,2,1,1,3,beginning,4.510543,5.028929,3,10.69,3.686500e-09,0.518386,...,-1.403047,-2.836003,-1.997360,2.239624,3.279069,2.278773e+06,264685.650813,-0.460853,,
1,3,1,1,4,to,4.982929,5.124925,4,16.35,3.969700e-09,0.141996,...,-2.823259,-0.355007,2.651694,1.328021,0.341111,2.285470e+06,810552.359796,-1.014870,,
1,4,1,1,5,get,5.078925,5.388327,5,13.79,3.774700e-09,0.309402,...,1.697060,0.866455,2.703942,3.773767,5.006329,2.282099e+06,503885.421098,-0.910472,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
48,2124,12,84,6,happens,723.404881,723.850976,2146,10.77,7.081200e-04,0.446095,...,-10.273847,-1.866075,-3.578832,0.135192,-0.699606,1.431503e+06,,-4.888292,-0.697687,-693335.335629
48,2125,12,84,7,when,723.856452,724.069881,2147,13.76,3.221100e-03,0.213429,...,-4.045046,-9.917142,-10.957989,-15.414474,-18.292680,1.441092e+06,,2.521327,-0.693801,-697424.848874
48,2126,12,84,8,one,724.075357,724.237500,2148,14.17,1.984200e-03,0.162143,...,0.707102,-1.286376,-0.940810,-6.454655,-8.868356,1.442091e+06,,0.059119,-0.700623,-695218.529187
48,2127,12,84,9,eats,724.242976,724.500901,2149,8.15,2.473000e-05,0.257925,...,3.356819,4.949933,9.308267,7.452277,9.631524,1.446945e+06,,4.998286,-0.696291,-694445.387990


In [7]:
our_erp_df = erp_df.copy()
# Average over N400 electrodes as given in paper.
n400_electrodes = set(N400_ELECTRODES) & set(our_erp_df.columns)
our_erp_df = our_erp_df.reset_index().set_index(["subject_idx", "sentence_idx", "word_idx"]) \
    .loc[:, n400_electrodes].mean(axis=1).rename("n400")
our_erp_df = pd.DataFrame(our_erp_df)
our_erp_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,n400
subject_idx,sentence_idx,word_idx,Unnamed: 3_level_1
1,1,1,3.250829
1,1,2,-7.932609
1,1,3,-1.355251
1,1,4,4.594125
1,1,5,3.111191
...,...,...,...
48,84,6,-11.019600
48,84,7,5.684939
48,84,8,2.004746
48,84,9,-6.014724


In [8]:
our_baseline_df = erp_df.reset_index().set_index(["subject_idx", "sentence_idx", "word_idx"]) \
    .loc[:, [f"{el}_baseline" for el in n400_electrodes]].mean(axis=1).rename("our_baseline")
our_baseline_df

subject_idx  sentence_idx  word_idx
1            1             1           -3.158778
                           2           -1.190170
                           3            1.882376
                           4           -2.749471
                           5            1.429668
                                         ...    
48           84            6           -1.562412
                           7          -17.022955
                           8            2.101324
                           9            7.551671
                           10           8.068166
Name: our_baseline, Length: 69561, dtype: float64

In [9]:
merged_df = pd.merge(erp_df.reset_index(), our_erp_df, 
                     left_on=["subject_idx", "sentence_idx", "word_idx"],
                     right_index=True)
merged_df = pd.merge(merged_df, our_baseline_df,
                     left_on=["subject_idx", "sentence_idx", "word_idx"],
                     right_index=True)
merged_df

Unnamed: 0,subject_idx,index,segment_idx,sentence_idx,word_idx,Word,onset,offset,Order,LogFreq,...,59_baseline,60_baseline,61_baseline,VEOG_baseline,Aux5_baseline,29_baseline,AUD,AUD_baseline,n400,our_baseline
0,1,0,1,1,1,Alice,3.772000,4.334721,1,8.65,...,3.639740,9.178524,11.813051,2.225084e+06,58973.355255,-1.739714,,,3.250829,-3.158778
1,1,1,1,1,2,was,4.288721,4.556543,2,14.56,...,-9.789070,-12.821799,-13.976495,2.277766e+06,317729.892392,-1.710223,,,-7.932609,-1.190170
2,1,2,1,1,3,beginning,4.510543,5.028929,3,10.69,...,-1.997360,2.239624,3.279069,2.278773e+06,264685.650813,-0.460853,,,-1.355251,1.882376
3,1,3,1,1,4,to,4.982929,5.124925,4,16.35,...,2.651694,1.328021,0.341111,2.285470e+06,810552.359796,-1.014870,,,4.594125,-2.749471
4,1,4,1,1,5,get,5.078925,5.388327,5,13.79,...,2.703942,3.773767,5.006329,2.282099e+06,503885.421098,-0.910472,,,3.111191,1.429668
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
69556,48,2124,12,84,6,happens,723.404881,723.850976,2146,10.77,...,-3.578832,0.135192,-0.699606,1.431503e+06,,-4.888292,-0.697687,-693335.335629,-11.019600,-1.562412
69557,48,2125,12,84,7,when,723.856452,724.069881,2147,13.76,...,-10.957989,-15.414474,-18.292680,1.441092e+06,,2.521327,-0.693801,-697424.848874,5.684939,-17.022955
69558,48,2126,12,84,8,one,724.075357,724.237500,2148,14.17,...,-0.940810,-6.454655,-8.868356,1.442091e+06,,0.059119,-0.700623,-695218.529187,2.004746,2.101324
69559,48,2127,12,84,9,eats,724.242976,724.500901,2149,8.15,...,9.308267,7.452277,9.631524,1.446945e+06,,4.998286,-0.696291,-694445.387990,-6.014724,7.551671


In [10]:
merged_df.to_csv("brennan_erp_n400.csv")

---

In [11]:
merged_df.our_baseline / 1e3

0       -0.003159
1       -0.001190
2        0.001882
3       -0.002749
4        0.001430
           ...   
69556   -0.001562
69557   -0.017023
69558    0.002101
69559    0.007552
69560    0.008068
Name: our_baseline, Length: 69561, dtype: float64