This notebook uses our own EEG library to reproduce the N400 effect in the naturalistic dataset of Brennan et al. 2018.

In [1]:
from pathlib import Path

import mne
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm

basedir = Path("..").resolve()
import sys
sys.path.append(str(basedir))

DATA_DIR = Path("/om/data/public/language-eeg/brennan2018-v2")

EPOCH_WINDOW = (-0.1, 0.924)
TEST_WINDOW = (0.3, 0.5)
BASELINE_WINDOW = (None, 0)

N400_ELECTRODES = ["1", "14", "24", "25", "26", "29", "30", "31", "41", "42", "44", "45"]

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
from mfn400.adapters.brennan2018 import BrennanDatasetAdapter

In [4]:
data = BrennanDatasetAdapter(DATA_DIR)

1
Opening raw data file /om/data/public/language-eeg/brennan2018-v2/eeg/S01/S01_alice-raw.fif...
    Range : 0 ... 366524 =      0.000 ...   733.048 secs
Ready.
Reading 0 ... 366524  =      0.000 ...   733.048 secs...
3
Opening raw data file /om/data/public/language-eeg/brennan2018-v2/eeg/S03/S03_alice-raw.fif...
    Range : 0 ... 367299 =      0.000 ...   734.598 secs
Ready.
Reading 0 ... 367299  =      0.000 ...   734.598 secs...
4
Opening raw data file /om/data/public/language-eeg/brennan2018-v2/eeg/S04/S04_alice-raw.fif...
    Range : 0 ... 368449 =      0.000 ...   736.898 secs
Ready.
Reading 0 ... 368449  =      0.000 ...   736.898 secs...
5
Opening raw data file /om/data/public/language-eeg/brennan2018-v2/eeg/S05/S05_alice-raw.fif...
    Range : 0 ... 372824 =      0.000 ...   745.648 secs
Ready.
Reading 0 ... 372824  =      0.000 ...   745.648 secs...


In [5]:
erp_df = data.to_erp(EPOCH_WINDOW, TEST_WINDOW, BASELINE_WINDOW, apply_baseline=False)

In [6]:
erp_df

Unnamed: 0_level_0,Unnamed: 1_level_0,segment_idx,sentence_idx,word_idx,Word,onset,offset,Order,LogFreq,SndPower,Length,...,57_baseline,58_baseline,59_baseline,60_baseline,61_baseline,VEOG_baseline,Aux5_baseline,29_baseline,AUD,AUD_baseline
subject_idx,index,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1,0,1,1,1,Alice,3.772000,4.334721,1,8.65,3.621500e-07,0.562721,...,3507.052201,2610.484563,1595.795709,-17880.721967,4205.606540,2.225084e+06,58973.355255,3206.484367,,
1,1,1,1,2,was,4.288721,4.556543,2,14.56,3.843500e-09,0.267822,...,3508.214542,2606.538065,1589.804559,-17972.745257,4186.587755,2.277766e+06,317729.892392,3205.775502,,
1,2,1,1,3,beginning,4.510543,5.028929,3,10.69,3.686500e-09,0.518386,...,3510.206335,2614.254346,1602.255150,-17932.030098,4208.680215,2.278773e+06,264685.650813,3206.945417,,
1,3,1,1,4,to,4.982929,5.124925,4,16.35,3.969700e-09,0.141996,...,3511.248362,2621.625176,1615.212692,-17960.946276,4211.101316,2.285470e+06,810552.359796,3206.349896,,
1,4,1,1,5,get,5.078925,5.388327,5,13.79,3.774700e-09,0.309402,...,3516.770794,2624.086533,1617.735535,-17966.838115,4219.185031,2.282099e+06,503885.421098,3206.764284,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5,2124,12,84,6,happens,739.188877,739.634972,2146,10.77,7.081200e-04,0.446095,...,-9527.710559,-9226.858251,4335.738308,9109.436991,-13670.721329,-1.032740e+06,,1035.565654,,
5,2125,12,84,7,when,739.640448,739.853877,2147,13.76,3.221100e-03,0.213429,...,-9530.863067,-9233.461583,4328.939014,9108.675717,-13673.222496,-1.031690e+06,,1036.026796,,
5,2126,12,84,8,one,739.859353,740.021496,2148,14.17,1.984200e-03,0.162143,...,-9527.782417,-9226.123035,4332.842050,9114.660450,-13676.379643,-1.033115e+06,,1037.283524,,
5,2127,12,84,9,eats,740.026972,740.284897,2149,8.15,2.473000e-05,0.257925,...,-9529.920094,-9229.648274,4329.994020,9106.164031,-13676.961226,-1.033589e+06,,1034.266708,,


In [7]:
our_erp_df = erp_df.copy()
# Average over N400 electrodes as given in paper.
n400_electrodes = set(N400_ELECTRODES) & set(our_erp_df.columns)
our_erp_df = our_erp_df.reset_index().set_index(["subject_idx", "sentence_idx", "word_idx"]) \
    .loc[:, n400_electrodes].mean(axis=1).rename("n400")
our_erp_df = pd.DataFrame(our_erp_df)
our_erp_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,n400
subject_idx,sentence_idx,word_idx,Unnamed: 3_level_1
1,1,1,-2652.885250
1,1,2,-2659.668562
1,1,3,-2659.656583
1,1,4,-2658.386092
1,1,5,-2657.064356
...,...,...,...
5,84,6,1172.318339
5,84,7,1170.580821
5,84,8,1171.786479
5,84,9,1173.387554


In [9]:
our_baseline_df = erp_df.reset_index().set_index(["subject_idx", "sentence_idx", "word_idx"]) \
    .loc[:, [f"{el}_baseline" for el in n400_electrodes]].mean(axis=1).rename("our_baseline")
our_baseline_df

subject_idx  sentence_idx  word_idx
1            1             1          -2645.913371
                           2          -2654.308097
                           3          -2653.121294
                           4          -2657.982869
                           5          -2662.319668
                                          ...     
5            84            6           1174.708027
                           7           1170.415761
                           8           1172.266441
                           9           1170.331628
                           10          1172.322336
Name: our_baseline, Length: 8516, dtype: float64

In [10]:
merged_df = pd.merge(erp_df.reset_index(), our_erp_df, 
                     left_on=["subject_idx", "sentence_idx", "word_idx"],
                     right_index=True)
merged_df = pd.merge(merged_df, our_baseline_df,
                     left_on=["subject_idx", "sentence_idx", "word_idx"],
                     right_index=True)
merged_df

Unnamed: 0,subject_idx,index,segment_idx,sentence_idx,word_idx,Word,onset,offset,Order,LogFreq,...,59_baseline,60_baseline,61_baseline,VEOG_baseline,Aux5_baseline,29_baseline,AUD,AUD_baseline,n400,our_baseline
0,1,0,1,1,1,Alice,3.772000,4.334721,1,8.65,...,1595.795709,-17880.721967,4205.606540,2.225084e+06,58973.355255,3206.484367,,,-2652.885250,-2645.913371
1,1,1,1,1,2,was,4.288721,4.556543,2,14.56,...,1589.804559,-17972.745257,4186.587755,2.277766e+06,317729.892392,3205.775502,,,-2659.668562,-2654.308097
2,1,2,1,1,3,beginning,4.510543,5.028929,3,10.69,...,1602.255150,-17932.030098,4208.680215,2.278773e+06,264685.650813,3206.945417,,,-2659.656583,-2653.121294
3,1,3,1,1,4,to,4.982929,5.124925,4,16.35,...,1615.212692,-17960.946276,4211.101316,2.285470e+06,810552.359796,3206.349896,,,-2658.386092,-2657.982869
4,1,4,1,1,5,get,5.078925,5.388327,5,13.79,...,1617.735535,-17966.838115,4219.185031,2.282099e+06,503885.421098,3206.764284,,,-2657.064356,-2662.319668
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8511,5,2124,12,84,6,happens,739.188877,739.634972,2146,10.77,...,4335.738308,9109.436991,-13670.721329,-1.032740e+06,,1035.565654,,,1172.318339,1174.708027
8512,5,2125,12,84,7,when,739.640448,739.853877,2147,13.76,...,4328.939014,9108.675717,-13673.222496,-1.031690e+06,,1036.026796,,,1170.580821,1170.415761
8513,5,2126,12,84,8,one,739.859353,740.021496,2148,14.17,...,4332.842050,9114.660450,-13676.379643,-1.033115e+06,,1037.283524,,,1171.786479,1172.266441
8514,5,2127,12,84,9,eats,740.026972,740.284897,2149,8.15,...,4329.994020,9106.164031,-13676.961226,-1.033589e+06,,1034.266708,,,1173.387554,1170.331628


In [11]:
merged_df.to_csv("brennan_erp_n400.csv")

---

In [13]:
merged_df.our_baseline / 1e3

0      -2.645913
1      -2.654308
2      -2.653121
3      -2.657983
4      -2.662320
          ...   
8511    1.174708
8512    1.170416
8513    1.172266
8514    1.170332
8515    1.172322
Name: our_baseline, Length: 8516, dtype: float64