# Biosignals Experiment Demo - Experimental Economics Lecture 05.02.2024
**Lecturer:** Michael Knierim (michael.knierim@kit.edu)

# Useful Resources

## Tutorials for Biosignals Recordings & Analyses
- Setting up an OpenBCI recording with EEG & ECG: https://docs.openbci.com/GettingStarted/Biosensing-Setups/ExGSetup/
- Conduct and analyze EEG experiments with Jupyter Notebooks: https://github.com/NeuroTechX/eeg-notebooks
- Analyzing biosignal data with Jupyter Notebooks: https://github.com/biosignalsplux/biosignalsnotebooks

## Major Frameworks for Biosignal Data Analysis with Python
- EEG Signal Processing & Analysis: MNE Python: https://mne.tools/stable/index.html
- ECG (& more!) Signal Processing & Analysis: Neurokit2: https://neuropsychology.github.io/NeuroKit/introduction.html

# Notebook Setup

In [None]:
# Load dependencies
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import plotly.express as px
from mne.filter import filter_data, notch_filter
import neurokit2 as nk
import mne
from scipy.integrate import simps

# Load and Prepare the Experiment Log

In [None]:
# Set up the names of your conditions (as they appear in the .csv file)
# Careful: This is case-sensitive!
name_condition_1 = 'condition1' 
name_condition_2 = 'condition2' 
name_condition_3 = 'condition3'
name_response = 'slider'

# Load the PsychoPy Log
exp_data = pd.read_csv('data/Test_demo-psychopy_2024-02-06_14h44.17.399.csv')
exp_data.head()

In [None]:
# Collect relevant timestamps (for biosignal data synchronization)
# Start of the experiment
ts_exp_start = exp_data['date'][0] 
# Convert from string (text) to time object
# For explanation see: https://www.geeksforgeeks.org/how-to-use-strptime-with-milliseconds-in-python/
ts_exp_start = datetime.strptime(ts_exp_start, '%Y-%m-%d_%Hh%M.%S.%f') 
print('Experiment started at: ' + str(ts_exp_start))

def get_condition_ts(condition_name):
    # Extract relative timestamp (seconds since start of the experiment)
    relative_ts = exp_data[condition_name].dropna().reset_index(drop=True)[0]
    # Return combination of experiment start ts and relative ts = absolute timestamp
    return ts_exp_start + timedelta(seconds=relative_ts)

# Start of eyes open (EO) condition
ts_eo_start = get_condition_ts('eo_stimulus.started')
print(type(ts_eo_start))

# Start of eyes closed (EC) condition
ts_ec_start = get_condition_ts('ec_stimulus.started')

# Start of standing (S) condition
ts_s_start = get_condition_ts('stand_stimulus.started')

# Start of first (here: "neutral") condition
ts_condition_1_start = get_condition_ts(name_condition_1 + '_images.started')

# Start of second (here: "luxury") condition
ts_condition_2_start = get_condition_ts(name_condition_2 + '_images.started')

# Start of third (here: "novelty") condition
ts_condition_3_start = get_condition_ts(name_condition_3 + '_images.started')

# Turn into DataFrame (for iteration later)
condition_times = pd.DataFrame({'Condition':['EO', 'EC', 'S', 
                                             name_condition_1, name_condition_2, name_condition_3],
                                'Start':[ts_eo_start, ts_ec_start, ts_s_start,
                                         ts_condition_1_start, ts_condition_2_start, ts_condition_3_start]})

condition_times['End'] = condition_times['Start'] + timedelta(seconds=60)

condition_times

In [None]:
# Locate and display the responses
condition_responses = pd.DataFrame({'Condition':[name_condition_1, name_condition_2, name_condition_3],
                                    'Response':exp_data[(name_response + '.response')].dropna().reset_index(drop=True)})

# Plot the responses
fig = px.bar(condition_responses, 
             x='Condition', y='Response', 
             labels={'Response':'Trusting (1=Not at all, 5=Very much)'})
fig.show()

# Load and Process the Biosignal Data

In [None]:
# Load the recording
biosignals = pd.read_csv("data/OpenBCI-RAW-2024-02-06_14-44-01.txt", skiprows=4)

# Remove some columns that we are not interested in (e.g. accelerometer data)
biosignals = biosignals[biosignals.columns.drop(list(biosignals.filter(regex='Accel|Other|Analog|Sample')))]

# Set the correct channel names (chose your setup!)
biosignals.columns = ['F7', 'F8', 'F3', 'F4', 'T3', 'T4', 'C3', 'C4',
                      'P7', 'P8', 'P3', 'P4', 'M2', 'Oz', 'Fz', 'ECG', 'TS_UNIX', 'TS']

# Convert the timestamps (for cutting the data)
# See: https://www.eldvyn.com/2020/08/converting-number-from-scientific-e.html
biosignals['TS_UNIX'] = biosignals['TS_UNIX']*1000
biosignals.drop(['TS'], axis=1, inplace=True)
biosignals['TS_UNIX'] = pd.to_datetime(biosignals['TS_UNIX'], unit='ms', utc=True).dt.tz_convert('Europe/Berlin')
display(biosignals)

In [None]:
# Annotate the data (to map experiment log timestamps on EEG data)
biosignals_times = biosignals['TS_UNIX'].dt.tz_localize(None)

for i in range(0, condition_times.shape[0]):
    # Get active condition
    condition = condition_times.loc[i, 'Condition']
    condition_start = condition_times.loc[i, 'Start']
    condition_end = condition_times.loc[i, 'End']
    
    idx_start = biosignals_times >= condition_start
    idx_end = biosignals_times < condition_end
    idx = np.logical_and(idx_start, idx_end)
    # Now add the annotations
    biosignals.loc[idx, 'Condition'] = condition

# Check if annotations are correct
# display(biosignals['Condition'].value_counts())
# display(biosignals.groupby('Condition').head(1))
# display(biosignals.groupby('Condition').tail(1))

biosignals.dropna(inplace=True) # Remove data between conditions (cutting!)

## General Preprocessing

In [None]:
# Mean center each channel
biosignals.iloc[:,:-2] = biosignals.iloc[:,:-2].apply(lambda x: x - x.mean())

# Notch filter (to remove line noise)
biosignals.iloc[:,:-2] = biosignals.iloc[:,:-2].apply(lambda x: notch_filter(np.array(x), Fs=125, freqs=[50], method='fir', copy=True, verbose='WARNING'))

## ECG Data Processing

In [None]:
# Subset the data
ecg = biosignals[['ECG', 'TS_UNIX', 'Condition']]

# Function to extract average heart rate (HR)
def extract_avg_hr(df, show=False, fs=125):
    # Process the ecg signal
    cleaned = nk.ecg_clean(df['ECG'], sampling_rate=fs, method="pantompkins1985")
    signals, info = nk.ecg_peaks(cleaned, sampling_rate=fs, method="pantompkins1985", correct_artifacts=True)
    
    # Plot the data if of interest
    if show == True: 
        nk.events_plot(info["ECG_R_Peaks"], cleaned) 

    # Extract average HR (by conversion of meanRR)
    hrv = nk.hrv_time(signals, sampling_rate=fs)

    # HR = 60000 / IBI
    avg_hr = 60000 / hrv['HRV_MeanNN']
    return pd.DataFrame({'Avg_HR':avg_hr})

### Plausibility Check

In [None]:
# Compare HR in eyes open rest (EO) vs. standing rest (S)
hr_eo = extract_avg_hr(ecg[ecg['Condition']=="EO"], show=True)
hr_s = extract_avg_hr(ecg[ecg['Condition']=="S"], show=True)

### Main Effect (HR Change in Conditions)

In [None]:
# Extract avg. HR for each condition
avg_hrs = ecg.drop('TS_UNIX', axis=1).groupby(['Condition']).apply(extract_avg_hr).reset_index().drop('level_1', axis=1)

# Plot distribution as bar plots
fig = px.bar(avg_hrs, 
             x='Condition', y='Avg_HR', 
             labels={'Avg_HR':'Avg. Heart Rate (HR)'})
fig.show()

## Excursus: Effectivity of different signal processing algorithms

In [None]:
# Select a condition as a data subset
test_set = ecg[ecg['Condition']=="S"]
ecg_signal = test_set['ECG']

# Process ECG signal with various algorithms
signals = pd.DataFrame({"ECG_Raw" : ecg_signal,
                        "ECG_NeuroKit" : nk.ecg_clean(ecg_signal, sampling_rate=125, method="neurokit"),
                        "ECG_BioSPPy" : nk.ecg_clean(ecg_signal, sampling_rate=125, method="biosppy"),
                        "ECG_PanTompkins" : nk.ecg_clean(ecg_signal, sampling_rate=125, method="pantompkins1985"),
                        "ECG_Hamilton" : nk.ecg_clean(ecg_signal, sampling_rate=125, method="hamilton2002"),
                        "ECG_Elgendi" : nk.ecg_clean(ecg_signal, sampling_rate=125, method="elgendi2010"),
                        "ECG_EngZeeMod" : nk.ecg_clean(ecg_signal, sampling_rate=125, method="engzeemod2012")})

signals = signals.reset_index(drop=True).reset_index().melt(id_vars='index', var_name='Pipeline', value_name='mV')
fig = px.line(signals, x='index', y='mV', facet_row="Pipeline")
fig.show()

## EEG Data Processing

In [None]:
# Subset the data
eeg = biosignals.drop('ECG', axis=1)

# Pre-process the data further
# Bandpass filter the data
eeg.iloc[:,:-2] = eeg.iloc[:,:-2].apply(lambda x: filter_data(np.array(x), sfreq=125, l_freq=1, h_freq=24, method='fir', copy=True, verbose='WARNING'))

### Plausibility Check (Berger Effect)

In [None]:
# Compare Alpha power in eyes open rest (EO) vs. eyes closed rest (EC) in occipital electrodes (O1 & O2)
def get_psd(eeg_df, condition, elecs):
    # Subset the data
    subset = eeg_df[eeg_df['Condition']==condition].loc[:,elecs]
    # Extract PSD
    psds, freqs = mne.time_frequency.psd_array_welch(np.array(subset).transpose(), sfreq=125, n_per_seg=375, n_overlap=250, fmin=1, fmax=24, verbose='WARNING')
    # Average over electrodes
    psds = psds.mean(0)
    # Normalize power spectrum
    psds = 10 * np.log10(psds)  # convert to dB
    
    # Turn into DF & Return
    return pd.DataFrame({'Frequency':freqs, 'Power':psds, 'Condition':condition})
    
psd_eo = get_psd(eeg, 'EO', ['Oz'])
psd_ec = get_psd(eeg, 'EC', ['Oz'])

fig = px.line(pd.concat([psd_eo, psd_ec]), x='Frequency', y='Power', color='Condition',
              labels={'Power':'db Power'})
fig.show()

### Main Effect (FAA Change in Conditions)

- FAA = Frontal Alpha Asymmetry
- FAA = Right Hemisphere – Left Hemisphere
- Higher FAA -> Greater Left Frontal Activity (Higher Approach Motivation)

In [None]:
# First, re-reference the data
ch_names = eeg.drop(['TS_UNIX', 'Condition'], axis=1).columns.values.tolist()
info = mne.create_info(ch_names, 125, ch_types='eeg')
linked_ref_ch = ['Oz'] # We will use a linked O1+O2 solution here...
ch_names_after = [ch for ch in ch_names if ch not in linked_ref_ch]

# Then create an mne raw EEG data object to set the new reference
raw = mne.io.RawArray(np.array(eeg.drop(['TS_UNIX', 'Condition'], axis=1)).transpose(), info, verbose='WARNING')
raw.set_eeg_reference(ref_channels=linked_ref_ch)
raw = raw.drop_channels(linked_ref_ch) # Drop the reference electrodes

# Retrieve data to df
reref_eeg = pd.DataFrame(raw.get_data().transpose(), columns=ch_names_after)
reref_eeg['TS_UNIX'] = eeg['TS_UNIX'].reset_index(drop=True)
reref_eeg['Condition'] = eeg['Condition'].reset_index(drop=True)
eeg = reref_eeg

In [None]:
# Compute power spectral distribution (PSD) for each region of interest (ROI) and condition
def extract_band_power(eeg_df, elecs, roi_name, freq_band_dict={'alpha':[7,13]}):
    freq_edges = list(sorted({ele for val in freq_band_dict.values() for ele in val}))
    freq_names = freq_band_dict.keys()
    
    condition = eeg_df.reset_index(drop=True).loc[0, 'Condition']
    psd_df = get_psd(eeg_df, condition, elecs)
    
    powers = psd_df.groupby(pd.cut(psd_df['Frequency'], bins = freq_edges, labels = freq_names)).Power.apply(lambda x:simps(x))
    powers['ROI'] = roi_name
    return powers

alpha_left = eeg.drop('TS_UNIX', axis=1).groupby(['Condition']).apply(extract_band_power, elecs=['F3', 'F7'], roi_name='Left').reset_index()
alpha_right = eeg.drop('TS_UNIX', axis=1).groupby(['Condition']).apply(extract_band_power, elecs=['F4', 'F8'], roi_name='Right').reset_index()

# Calculate FAA
alpha_frontal = pd.concat([alpha_left, alpha_right]).pivot(index='Condition', columns='ROI', values='alpha')
alpha_frontal['FAA'] = alpha_frontal['Right'] - alpha_frontal['Left']
alpha_frontal.reset_index(inplace=True)

# Plot distribution as bar plots
fig = px.bar(alpha_frontal, 
             x='Condition', y='FAA', 
             labels={'FAA':'Frontal Alpha Asymmetry (FAA)'})
fig.show()