This notebook uses our own EEG library to reproduce the N400 effect in the naturalistic dataset of Brennan et al. 2018.

In [1]:
import logging
logging.basicConfig(level=logging.DEBUG, handlers=[])
from pathlib import Path

import mne
mne.set_log_level(logging.WARNING)
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm

basedir = Path("..").resolve()
import sys
sys.path.append(str(basedir))

DATA_DIR = Path("/om/data/public/language-eeg/brennan2018-v2")

FILTER_WINDOW = (0.5, 20)

EPOCH_WINDOW = (-0.1, 0.924)
TEST_WINDOW = (0.3, 0.5)
BASELINE_WINDOW = (None, 0)

# TODO: What is right for this montage?
# N400_ELECTRODES = ["1", "14", "24", "25", "26", "29", "30", "31", "41", "42", "44", "45"]
# Picking some central anterior sensors here
N400_ELECTRODES = ["45", "34", "35", "1"]

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
from mfn400.adapters.brennan2018 import BrennanDatasetAdapter

In [20]:
data = BrennanDatasetAdapter(DATA_DIR)

loading subject data:   0%|          | 0/10 [00:00<?, ?it/s]

2022-05-19 17:14:06 |[36m DEBUG    [0m| mfn400.adapters.brennan2018 | Loading subject 1
2022-05-19 17:14:06 |[36m DEBUG    [0m| mfn400.adapters.brennan2018 | Loading subject 1
2022-05-19 17:14:09 |[36m DEBUG    [0m| mfn400.adapters.brennan2018 | Loading subject 3
2022-05-19 17:14:09 |[36m DEBUG    [0m| mfn400.adapters.brennan2018 | Loading subject 3
2022-05-19 17:14:11 |[36m DEBUG    [0m| mfn400.adapters.brennan2018 | Loading subject 4
2022-05-19 17:14:11 |[36m DEBUG    [0m| mfn400.adapters.brennan2018 | Loading subject 4
2022-05-19 17:14:13 |[36m DEBUG    [0m| mfn400.adapters.brennan2018 | Loading subject 5
2022-05-19 17:14:13 |[36m DEBUG    [0m| mfn400.adapters.brennan2018 | Loading subject 5
2022-05-19 17:14:16 |[36m DEBUG    [0m| mfn400.adapters.brennan2018 | Loading subject 6
2022-05-19 17:14:16 |[36m DEBUG    [0m| mfn400.adapters.brennan2018 | Loading subject 6
2022-05-19 17:14:18 |[36m DEBUG    [0m| mfn400.adapters.brennan2018 | Loading subject 8
2022-05-19

In [22]:
erp_df = data.to_erp(EPOCH_WINDOW, TEST_WINDOW, BASELINE_WINDOW, apply_baseline=False,
                     filter_window=FILTER_WINDOW)

preprocessing subjects:   0%|          | 0/10 [00:00<?, ?it/s]

2022-05-19 17:14:38 |[36m DEBUG    [0m| mfn400.adapters.brennan2018 | Preprocessing subject 1
2022-05-19 17:14:38 |[36m DEBUG    [0m| mfn400.adapters.brennan2018 | Preprocessing subject 1
2022-05-19 17:14:41 |[36m DEBUG    [0m| mfn400.adapters.brennan2018 | Preprocessing subject 3
2022-05-19 17:14:41 |[36m DEBUG    [0m| mfn400.adapters.brennan2018 | Preprocessing subject 3
2022-05-19 17:14:42 |[36m DEBUG    [0m| mfn400.adapters.brennan2018 | Preprocessing subject 4
2022-05-19 17:14:42 |[36m DEBUG    [0m| mfn400.adapters.brennan2018 | Preprocessing subject 4
2022-05-19 17:14:43 |[36m DEBUG    [0m| mfn400.adapters.brennan2018 | Preprocessing subject 5
2022-05-19 17:14:43 |[36m DEBUG    [0m| mfn400.adapters.brennan2018 | Preprocessing subject 5
2022-05-19 17:14:45 |[36m DEBUG    [0m| mfn400.adapters.brennan2018 | Preprocessing subject 6
2022-05-19 17:14:45 |[36m DEBUG    [0m| mfn400.adapters.brennan2018 | Preprocessing subject 6
2022-05-19 17:14:46 |[36m DEBUG    [0m

to_epochs:   0%|          | 0/10 [00:00<?, ?it/s]

2022-05-19 17:14:51 |[36m DEBUG    [0m| mfn400.adapters | to_epochs for subject 1
2022-05-19 17:14:52 |[36m DEBUG    [0m| mfn400.adapters | to_epochs for subject 3
2022-05-19 17:14:52 |[36m DEBUG    [0m| mfn400.adapters | to_epochs for subject 4
2022-05-19 17:14:53 |[36m DEBUG    [0m| mfn400.adapters | to_epochs for subject 5
2022-05-19 17:14:53 |[36m DEBUG    [0m| mfn400.adapters | to_epochs for subject 6
2022-05-19 17:14:53 |[36m DEBUG    [0m| mfn400.adapters | to_epochs for subject 8
2022-05-19 17:14:54 |[36m DEBUG    [0m| mfn400.adapters | to_epochs for subject 10
2022-05-19 17:14:54 |[36m DEBUG    [0m| mfn400.adapters | to_epochs for subject 11
2022-05-19 17:14:55 |[36m DEBUG    [0m| mfn400.adapters | to_epochs for subject 12
2022-05-19 17:14:55 |[36m DEBUG    [0m| mfn400.adapters | to_epochs for subject 13


preparing ERP df:   0%|          | 0/10 [00:00<?, ?it/s]

In [23]:
erp_df

Unnamed: 0_level_0,Unnamed: 1_level_0,segment_idx,sentence_idx,word_idx,Word,onset,offset,Order,LogFreq,SndPower,Length,...,57_baseline,58_baseline,59_baseline,60_baseline,61_baseline,VEOG_baseline,Aux5_baseline,29_baseline,AUD,AUD_baseline
subject_idx,index,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1,0,1,1,1,Alice,3.772000,4.334721,1,8.65,3.621500e-07,0.562721,...,-0.001475,0.001018,0.003640,0.009179,0.011813,2225.083744,58.973355,-0.001740,,
1,1,1,1,2,was,4.288721,4.556543,2,14.56,3.843500e-09,0.267822,...,-0.002080,-0.007315,-0.009789,-0.012822,-0.013976,2277.766415,317.729892,-0.001710,,
1,2,1,1,3,beginning,4.510543,5.028929,3,10.69,3.686500e-09,0.518386,...,-0.001403,-0.002836,-0.001997,0.002240,0.003279,2278.773383,264.685651,-0.000461,,
1,3,1,1,4,to,4.982929,5.124925,4,16.35,3.969700e-09,0.141996,...,-0.002823,-0.000355,0.002652,0.001328,0.000341,2285.470345,810.552360,-0.001015,,
1,4,1,1,5,get,5.078925,5.388327,5,13.79,3.774700e-09,0.309402,...,0.001697,0.000866,0.002704,0.003774,0.005006,2282.099158,503.885421,-0.000910,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13,2124,12,84,6,happens,730.972874,731.418969,2146,10.77,7.081200e-04,0.446095,...,0.000241,-0.003088,-0.007458,-0.009937,-0.013763,786.860189,,-0.000550,0.000038,66.069774
13,2125,12,84,7,when,731.424445,731.637874,2147,13.76,3.221100e-03,0.213429,...,-0.001578,0.000703,0.002958,0.000677,-0.002112,790.228162,,0.000731,0.000064,40.047566
13,2126,12,84,8,one,731.643350,731.805493,2148,14.17,1.984200e-03,0.162143,...,0.007340,0.014290,0.018462,0.021911,0.022539,790.694503,,0.002551,0.000019,53.636747
13,2127,12,84,9,eats,731.810969,732.068894,2149,8.15,2.473000e-05,0.257925,...,-0.002540,-0.001304,-0.003380,-0.004360,-0.001459,790.711980,,-0.000880,0.000046,58.484183


In [24]:
our_erp_df = erp_df.copy()
# Average over N400 electrodes as given in paper.
n400_electrodes = set(N400_ELECTRODES) & set(our_erp_df.columns)
our_erp_df = our_erp_df.reset_index().set_index(["subject_idx", "sentence_idx", "word_idx"]) \
    .loc[:, n400_electrodes].mean(axis=1).rename("n400")
our_erp_df = pd.DataFrame(our_erp_df)
our_erp_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,n400
subject_idx,sentence_idx,word_idx,Unnamed: 3_level_1
1,1,1,0.000996
1,1,2,-0.010071
1,1,3,0.001360
1,1,4,0.009356
1,1,5,0.005558
...,...,...,...
13,84,6,-0.001627
13,84,7,-0.006468
13,84,8,-0.001873
13,84,9,0.004473


In [25]:
our_baseline_df = erp_df.reset_index().set_index(["subject_idx", "sentence_idx", "word_idx"]) \
    .loc[:, [f"{el}_baseline" for el in n400_electrodes]].mean(axis=1).rename("our_baseline")
our_baseline_df

subject_idx  sentence_idx  word_idx
1            1             1          -0.000136
                           2          -0.008749
                           3           0.002767
                           4           0.001790
                           5           0.002097
                                         ...   
13           84            6          -0.005934
                           7          -0.003053
                           8           0.015778
                           9           0.000562
                           10         -0.003441
Name: our_baseline, Length: 21290, dtype: float64

In [26]:
merged_df = pd.merge(erp_df.reset_index(), our_erp_df, 
                     left_on=["subject_idx", "sentence_idx", "word_idx"],
                     right_index=True)
merged_df = pd.merge(merged_df, our_baseline_df,
                     left_on=["subject_idx", "sentence_idx", "word_idx"],
                     right_index=True)
merged_df

Unnamed: 0,subject_idx,index,segment_idx,sentence_idx,word_idx,Word,onset,offset,Order,LogFreq,...,59_baseline,60_baseline,61_baseline,VEOG_baseline,Aux5_baseline,29_baseline,AUD,AUD_baseline,n400,our_baseline
0,1,0,1,1,1,Alice,3.772000,4.334721,1,8.65,...,0.003640,0.009179,0.011813,2225.083744,58.973355,-0.001740,,,0.000996,-0.000136
1,1,1,1,1,2,was,4.288721,4.556543,2,14.56,...,-0.009789,-0.012822,-0.013976,2277.766415,317.729892,-0.001710,,,-0.010071,-0.008749
2,1,2,1,1,3,beginning,4.510543,5.028929,3,10.69,...,-0.001997,0.002240,0.003279,2278.773383,264.685651,-0.000461,,,0.001360,0.002767
3,1,3,1,1,4,to,4.982929,5.124925,4,16.35,...,0.002652,0.001328,0.000341,2285.470345,810.552360,-0.001015,,,0.009356,0.001790
4,1,4,1,1,5,get,5.078925,5.388327,5,13.79,...,0.002704,0.003774,0.005006,2282.099158,503.885421,-0.000910,,,0.005558,0.002097
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21285,13,2124,12,84,6,happens,730.972874,731.418969,2146,10.77,...,-0.007458,-0.009937,-0.013763,786.860189,,-0.000550,0.000038,66.069774,-0.001627,-0.005934
21286,13,2125,12,84,7,when,731.424445,731.637874,2147,13.76,...,0.002958,0.000677,-0.002112,790.228162,,0.000731,0.000064,40.047566,-0.006468,-0.003053
21287,13,2126,12,84,8,one,731.643350,731.805493,2148,14.17,...,0.018462,0.021911,0.022539,790.694503,,0.002551,0.000019,53.636747,-0.001873,0.015778
21288,13,2127,12,84,9,eats,731.810969,732.068894,2149,8.15,...,-0.003380,-0.004360,-0.001459,790.711980,,-0.000880,0.000046,58.484183,0.004473,0.000562


In [27]:
merged_df.to_csv("brennan_erp_n400.csv")

---

In [13]:
merged_df.our_baseline / 1e3

0      -2.645913
1      -2.654308
2      -2.653121
3      -2.657983
4      -2.662320
          ...   
8511    1.174708
8512    1.170416
8513    1.172266
8514    1.170332
8515    1.172322
Name: our_baseline, Length: 8516, dtype: float64