## Import packages and define functions

In [None]:
%matplotlib qt
import mne
import numpy as np
import pandas as pd
import scipy.stats as ss
import matplotlib.pyplot as plt
from artifact_blocking import run_ab
from statsmodels.stats.multitest import fdrcorrection

In [None]:
def preprocess(filepath, evcode_files):
    
    # Load the EEG file and extract trigger times
    eeg = mne.io.read_raw_egi(filepath, preload=True)
    events = mne.find_events(eeg)
    
    # Keep channels E1-E128
    channels = ['E%s' % i for i in range(1, 129)]
    eeg = eeg.pick_channels(channels)
    
    # Select only events with trigger code 1
    events = events[events[:, 2] == 1]

    # Re-code events using codes found in evcode_files
    evcodes = []
    for f in evcode_files:
        evcodes.append(pd.read_csv(f, header=None))
    evcodes = np.concatenate(evcodes).flatten()
    events[:, 2] = evcodes
    
    # .5-40 Hz bandpass FIR filter
    l_freq = .5
    h_freq = 40
    l_trans_bandwidth = 'auto'
    h_trans_bandwidth = 'auto'
    filter_length = 'auto'
    fir_window = 'hamming'
    phase = 'zero'
    picks = None
    eeg = eeg.filter(l_freq, h_freq, method='fir', fir_design='firwin', l_trans_bandwidth=l_trans_bandwidth, h_trans_bandwidth=h_trans_bandwidth, filter_length=filter_length, fir_window=fir_window, phase=phase, pad='reflect_limited', picks=picks)

    # Clean data using artifact blocking
    run_ab(eeg, threshold=75, method='window')
    
    # Epoch data around the trigger pulses in the EEG recording
    eeg = mne.Epochs(eeg, events, tmin=-.8, tmax=1.6, preload=True)

    # Apply common average reference
    eeg = eeg.set_eeg_reference(ref_channels='average')
    
    # Baseline correct epochs
    start = None
    stop = 0
    eeg = eeg.apply_baseline((start, stop))
    
    # Reject epochs using peak-to-peak amplitude (in uV)
    threshold = 200
    eeg = eeg.drop_bad({'eeg': threshold / 1000000.})
    
    return eeg

def frequency_tagging(data, sampling_rate):

    # Run FFT
    nfreqs = data.shape[1]
    freqs = np.fft.fftfreq(nfreqs, d=1/sampling_rate)
    amp = np.abs(np.fft.fft(data, n=nfreqs, axis=1))
    amp = amp[:, freqs >= 0]
    freqs = freqs[freqs >= 0]

    # Subtract neighboring bins
    sub_amp = np.empty_like(amp)
    sub_amp[:, 0] = amp[:, 0] - amp[:, 1]
    for i in range(1, amp.shape[1] - 1):
        sub_amp[:, i] = amp[:, i] - (amp[:, i-1] + amp[:, i+1]) * .5
    sub_amp[:, -1] = amp[:, -1] - amp[:, -2]
    sub_amp[sub_amp < 0] = 0  # Don't allow negative amplitudes

    # Average amplitudes across neighboring bins
    norm_amp = np.zeros((sub_amp.shape[0], len(freqs)))
    norm_amp[:, 0] = np.mean(sub_amp[:, :2], axis=1)
    for i in range(1, sub_amp.shape[1] - 1):
        norm_amp[:, i] = np.mean(sub_amp[:, i-1:i+2], axis=1)
    norm_amp[:, -1] = np.mean(sub_amp[:, -2:], axis=1)
    
    return freqs, norm_amp
    

## Define which files belong to each participant

In [None]:
eegfiles = dict(
    s11 = 'C://Users/jpazd/Downloads/Adult_11_D.mff',
    s12 = 'C://Users/jpazd/Downloads/Adult_12_D.mff',
)

evcode_files = dict(
    s11 = ['C://Users/jpazd/Downloads/Video_Trigger_files/DupWav6min2.csv', 
          'C://Users/jpazd/Downloads/Video_Trigger_files/DupWav6min4.csv', 
          'C://Users/jpazd/Downloads/Video_Trigger_files/DupWav6min6.csv'],
    
    s12 = ['C://Users/jpazd/Downloads/Video_Trigger_files/DupWav6min5.csv', 
          'C://Users/jpazd/Downloads/Video_Trigger_files/DupWav6min7.csv', 
          'C://Users/jpazd/Downloads/Video_Trigger_files/DupWav6min9.csv']
)

## Loop over participants

In [None]:
# ERPs will be recorded in a subject x channel x time matrix
erps1 = np.full((len(eegfiles), 128, 2401), np.nan)
erps2 = np.full((len(eegfiles), 128, 2401), np.nan)
ft1 = np.full((len(eegfiles), 128, 1201), np.nan) 
ft2 = np.full((len(eegfiles), 128, 1201), np.nan)

# Preprocess each subject's data, calculate ERPs for them, and run frequency tagging
for i, subj in enumerate(eegfiles):
    print('PROCESSING SUBJECT %s' % subj)
    
    # Preprocess and epcoh EEG data for the current subject
    eeg = preprocess(eegfiles[subj], evcode_files[subj])
    times = eeg.times
    
    # Calculate subject's ERPs for each channel in each condition
    erp1 = eeg['1'].average()
    erp2 = eeg['2'].average()
    
    # Save subject's ERPs into overall data matrices (and convert from V to uV)
    erps1[i, :, :] = erp1.data * 1000000
    erps2[i, :, :] = erp2.data * 1000000
    
    # Run frequency tagging on each ERP to get normalized power spectrum
    freqs, ft1[i, :, :] = frequency_tagging(erp1.data, erp1.info['sfreq'])
    freqs, ft2[i, :, :] = frequency_tagging(erp2.data, erp1.info['sfreq'])
    
    # Clean up this session's variables to free up space before loading next session
    del eeg, erp1, erp2

# Save ERP and frequency tagging matrices to numpy files
np.save('C://Users/jpazd/Downloads/cond1_erps.npy', erps1)
np.save('C://Users/jpazd/Downloads/cond2_erps.npy', erps2)
np.save('C://Users/jpazd/Downloads/cond1_ft.npy', ft1)
np.save('C://Users/jpazd/Downloads/cond2_ft.npy', ft2)

## Dependent samples *t*-test to determine significant timepoints for channel E62 (Pz)

In [None]:
# Compare each time point between conditions, giving one t and p-value per time point
tvals, pvals = ss.ttest_rel(erps1[:, 61, :], erps2[:, 61, :])

# Normally you should run FDR correction, but for illustrative purposes we will not
# sig, pvals_adjusted = fdrcorrection(pvals, alpha=0.05)
sig = pvals < .05

## Plot cross-participant average for channel E62 (Pz)

In [None]:
# Settings
condition_names = ['Condition 1', 'Condition 2']
color1 = 'C0'
color2 = 'C3'
line_opacity = 1
shade_opacity = .1

# Create axis lines at 0 uV and time 0
plt.axvline(0, c='k', ls='--')
plt.axhline(0, c='k', ls='--')

# Plot ERPs
m1 = np.mean(erps1[:, 61, :], axis=0)
m2 = np.mean(erps2[:, 61, :], axis=0)
sem1 = ss.sem(erps1[:, 61, :], axis=0)
sem2 = ss.sem(erps2[:, 61, :], axis=0)
l1, = plt.plot(times, m1, c=color1, alpha=line_opacity)
plt.fill_between(times, m1-sem1, m1 + sem1, color=color1, alpha=shade_opacity)
l2, = plt.plot(times, m2, c=color2, alpha=line_opacity)
plt.fill_between(times, m2-sem2, m2 + sem2, color=color2, alpha=shade_opacity)

# Mark significant time points with a black dot at 4 uV
plt.scatter(times[sig], np.full(sig.sum(), 4), c='k', s=2)

# Add labels and legend
plt.xlabel('Time (s)')
plt.ylabel('Amplitude (uV)')
plt.legend([l1, l2], condition_names)

## Dependent samples *t*-test to determine significant frequencies for channel E62 (Pz)

In [None]:
# Compare each time point between conditions, giving one t and p-value per time point
tvals, pvals = ss.ttest_rel(ft1[:, 61, :], ft2[:, 61, :])

# Normally you should run FDR correction, but for illustrative purposes we will not
# sig, pvals_adjusted = fdrcorrection(pvals, alpha=0.05)
sig = pvals < .05

## Plot power spectra between conditions

In [None]:
condition_names = ['Condition 1', 'Condition 2']
color1 = 'C0'
color2 = 'C3'
line_opacity = 1
shade_opacity = .1

m1 = np.mean(ft1[:, 61, :], axis=0)
m2 = np.mean(ft2[:, 61, :], axis=0)
sem1 = ss.sem(ft1[:, 61, :], axis=0)
sem2 = ss.sem(ft2[:, 61, :], axis=0)

l1, = plt.plot(freqs, m1, c=color1, alpha=line_opacity)
plt.fill_between(freqs, m1-sem1, m1 + sem1, color=color1, alpha=shade_opacity)
l2, = plt.plot(freqs, m2, c=color2, alpha=line_opacity)
plt.fill_between(freqs, m2-sem2, m2 + sem2, color=color2, alpha=shade_opacity)

plt.xlim(0, 40)
plt.ylabel('Amplitude')
plt.xlabel('Frequency')
plt.legend([l1, l2], condition_names)