This notebook uses our own EEG library to reproduce the N400 effect in the naturalistic dataset of Brennan et al. 2018.

In [10]:
import logging
logging.basicConfig(level=logging.DEBUG, handlers=[])
from pathlib import Path

import mne
mne.set_log_level(logging.WARNING)
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm

basedir = Path("..").resolve()
import sys
sys.path.append(str(basedir))

DATA_DIR = Path("/om/data/public/language-eeg/brennan2018-v2")

FILTER_WINDOW = (0.5, 20)

EPOCH_WINDOW = (-0.1, 0.924)
TEST_WINDOW = (0.3, 0.5)
BASELINE_WINDOW = (None, 0)

# TODO: What is right for this montage?
# N400_ELECTRODES = ["1", "14", "24", "25", "26", "29", "30", "31", "41", "42", "44", "45"]
# Picking some central anterior sensors here
N400_ELECTRODES = ["45", "34", "35", "1"]

In [11]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [12]:
from mfn400.adapters.brennan2018 import BrennanDatasetAdapter

In [13]:
data = BrennanDatasetAdapter(DATA_DIR)

loading subject data:   0%|          | 0/33 [00:00<?, ?it/s]

2022-05-19 12:04:25 |[36m DEBUG    [0m| mfn400.adapters.brennan2018 | Loading subject 1
2022-05-19 12:04:27 |[36m DEBUG    [0m| mfn400.adapters.brennan2018 | Loading subject 3
2022-05-19 12:04:30 |[36m DEBUG    [0m| mfn400.adapters.brennan2018 | Loading subject 4
2022-05-19 12:04:32 |[36m DEBUG    [0m| mfn400.adapters.brennan2018 | Loading subject 5
2022-05-19 12:04:35 |[36m DEBUG    [0m| mfn400.adapters.brennan2018 | Loading subject 6
2022-05-19 12:04:37 |[36m DEBUG    [0m| mfn400.adapters.brennan2018 | Loading subject 8
2022-05-19 12:04:39 |[36m DEBUG    [0m| mfn400.adapters.brennan2018 | Loading subject 10
2022-05-19 12:04:42 |[36m DEBUG    [0m| mfn400.adapters.brennan2018 | Loading subject 11
2022-05-19 12:04:44 |[36m DEBUG    [0m| mfn400.adapters.brennan2018 | Loading subject 12
2022-05-19 12:04:46 |[36m DEBUG    [0m| mfn400.adapters.brennan2018 | Loading subject 13
2022-05-19 12:04:49 |[36m DEBUG    [0m| mfn400.adapters.brennan2018 | Loading subject 14
2022-

In [15]:
erp_df = data.to_erp(EPOCH_WINDOW, TEST_WINDOW, BASELINE_WINDOW, apply_baseline=False,
                     filter_window=FILTER_WINDOW)

to_epochs:   0%|          | 0/33 [00:00<?, ?it/s]

2022-05-19 12:11:34 |[36m DEBUG    [0m| mfn400.adapters | to_epochs for subject 1
2022-05-19 12:11:35 |[36m DEBUG    [0m| mfn400.adapters | to_epochs for subject 3
2022-05-19 12:11:35 |[36m DEBUG    [0m| mfn400.adapters | to_epochs for subject 4
2022-05-19 12:11:36 |[36m DEBUG    [0m| mfn400.adapters | to_epochs for subject 5
2022-05-19 12:11:36 |[36m DEBUG    [0m| mfn400.adapters | to_epochs for subject 6
2022-05-19 12:11:37 |[36m DEBUG    [0m| mfn400.adapters | to_epochs for subject 8
2022-05-19 12:11:38 |[36m DEBUG    [0m| mfn400.adapters | to_epochs for subject 10
2022-05-19 12:11:38 |[36m DEBUG    [0m| mfn400.adapters | to_epochs for subject 11
2022-05-19 12:11:39 |[36m DEBUG    [0m| mfn400.adapters | to_epochs for subject 12
2022-05-19 12:11:39 |[36m DEBUG    [0m| mfn400.adapters | to_epochs for subject 13
2022-05-19 12:11:40 |[36m DEBUG    [0m| mfn400.adapters | to_epochs for subject 14
2022-05-19 12:11:41 |[36m DEBUG    [0m| mfn400.adapters | to_epochs f

RuntimeError: Event time samples were not unique. Consider setting the `event_repeated` parameter."

In [34]:
erp_df

Unnamed: 0_level_0,Unnamed: 1_level_0,segment_idx,sentence_idx,word_idx,Word,onset,offset,Order,LogFreq,SndPower,Length,...,57_baseline,58_baseline,59_baseline,60_baseline,61_baseline,VEOG_baseline,Aux5_baseline,29_baseline,AUD,AUD_baseline
subject_idx,index,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1,0,1,1,1,Alice,3.772000,4.334721,1,8.65,3.621500e-07,0.562721,...,-1.475362,1.018221,3.639740,9.178524,11.813051,2.225084e+06,58973.355255,-1.739714,,
1,1,1,1,2,was,4.288721,4.556543,2,14.56,3.843500e-09,0.267822,...,-2.079682,-7.315422,-9.789070,-12.821799,-13.976495,2.277766e+06,317729.892392,-1.710223,,
1,2,1,1,3,beginning,4.510543,5.028929,3,10.69,3.686500e-09,0.518386,...,-1.403047,-2.836003,-1.997360,2.239624,3.279069,2.278773e+06,264685.650813,-0.460853,,
1,3,1,1,4,to,4.982929,5.124925,4,16.35,3.969700e-09,0.141996,...,-2.823259,-0.355007,2.651694,1.328021,0.341111,2.285470e+06,810552.359796,-1.014870,,
1,4,1,1,5,get,5.078925,5.388327,5,13.79,3.774700e-09,0.309402,...,1.697060,0.866455,2.703942,3.773767,5.006329,2.282099e+06,503885.421098,-0.910472,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13,2124,12,84,6,happens,730.972874,731.418969,2146,10.77,7.081200e-04,0.446095,...,0.240526,-3.088103,-7.457530,-9.936517,-13.762729,7.868602e+05,,-0.549972,0.038277,66069.773672
13,2125,12,84,7,when,731.424445,731.637874,2147,13.76,3.221100e-03,0.213429,...,-1.578495,0.703140,2.958282,0.676569,-2.112208,7.902282e+05,,0.731157,0.063910,40047.566133
13,2126,12,84,8,one,731.643350,731.805493,2148,14.17,1.984200e-03,0.162143,...,7.339651,14.289617,18.462127,21.911430,22.538949,7.906945e+05,,2.550683,0.018782,53636.747467
13,2127,12,84,9,eats,731.810969,732.068894,2149,8.15,2.473000e-05,0.257925,...,-2.539556,-1.303939,-3.380252,-4.360008,-1.458582,7.907120e+05,,-0.879637,0.046487,58484.182657


In [35]:
our_erp_df = erp_df.copy()
# Average over N400 electrodes as given in paper.
n400_electrodes = set(N400_ELECTRODES) & set(our_erp_df.columns)
our_erp_df = our_erp_df.reset_index().set_index(["subject_idx", "sentence_idx", "word_idx"]) \
    .loc[:, n400_electrodes].mean(axis=1).rename("n400")
our_erp_df = pd.DataFrame(our_erp_df)
our_erp_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,n400
subject_idx,sentence_idx,word_idx,Unnamed: 3_level_1
1,1,1,0.995938
1,1,2,-10.070998
1,1,3,1.359659
1,1,4,9.355550
1,1,5,5.557713
...,...,...,...
13,84,6,-1.626823
13,84,7,-6.467515
13,84,8,-1.872713
13,84,9,4.472840


In [36]:
our_baseline_df = erp_df.reset_index().set_index(["subject_idx", "sentence_idx", "word_idx"]) \
    .loc[:, [f"{el}_baseline" for el in n400_electrodes]].mean(axis=1).rename("our_baseline")
our_baseline_df

subject_idx  sentence_idx  word_idx
1            1             1           -0.135712
                           2           -8.749145
                           3            2.767415
                           4            1.790051
                           5            2.096851
                                         ...    
13           84            6           -5.934142
                           7           -3.052708
                           8           15.778101
                           9            0.562239
                           10          -3.441129
Name: our_baseline, Length: 21290, dtype: float64

In [37]:
merged_df = pd.merge(erp_df.reset_index(), our_erp_df, 
                     left_on=["subject_idx", "sentence_idx", "word_idx"],
                     right_index=True)
merged_df = pd.merge(merged_df, our_baseline_df,
                     left_on=["subject_idx", "sentence_idx", "word_idx"],
                     right_index=True)
merged_df

Unnamed: 0,subject_idx,index,segment_idx,sentence_idx,word_idx,Word,onset,offset,Order,LogFreq,...,59_baseline,60_baseline,61_baseline,VEOG_baseline,Aux5_baseline,29_baseline,AUD,AUD_baseline,n400,our_baseline
0,1,0,1,1,1,Alice,3.772000,4.334721,1,8.65,...,3.639740,9.178524,11.813051,2.225084e+06,58973.355255,-1.739714,,,0.995938,-0.135712
1,1,1,1,1,2,was,4.288721,4.556543,2,14.56,...,-9.789070,-12.821799,-13.976495,2.277766e+06,317729.892392,-1.710223,,,-10.070998,-8.749145
2,1,2,1,1,3,beginning,4.510543,5.028929,3,10.69,...,-1.997360,2.239624,3.279069,2.278773e+06,264685.650813,-0.460853,,,1.359659,2.767415
3,1,3,1,1,4,to,4.982929,5.124925,4,16.35,...,2.651694,1.328021,0.341111,2.285470e+06,810552.359796,-1.014870,,,9.355550,1.790051
4,1,4,1,1,5,get,5.078925,5.388327,5,13.79,...,2.703942,3.773767,5.006329,2.282099e+06,503885.421098,-0.910472,,,5.557713,2.096851
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21285,13,2124,12,84,6,happens,730.972874,731.418969,2146,10.77,...,-7.457530,-9.936517,-13.762729,7.868602e+05,,-0.549972,0.038277,66069.773672,-1.626823,-5.934142
21286,13,2125,12,84,7,when,731.424445,731.637874,2147,13.76,...,2.958282,0.676569,-2.112208,7.902282e+05,,0.731157,0.063910,40047.566133,-6.467515,-3.052708
21287,13,2126,12,84,8,one,731.643350,731.805493,2148,14.17,...,18.462127,21.911430,22.538949,7.906945e+05,,2.550683,0.018782,53636.747467,-1.872713,15.778101
21288,13,2127,12,84,9,eats,731.810969,732.068894,2149,8.15,...,-3.380252,-4.360008,-1.458582,7.907120e+05,,-0.879637,0.046487,58484.182657,4.472840,0.562239


In [38]:
merged_df.to_csv("brennan_erp_n400.csv")

---

In [13]:
merged_df.our_baseline / 1e3

0      -2.645913
1      -2.654308
2      -2.653121
3      -2.657983
4      -2.662320
          ...   
8511    1.174708
8512    1.170416
8513    1.172266
8514    1.170332
8515    1.172322
Name: our_baseline, Length: 8516, dtype: float64