In [7]:
import scipy.io as sio
import numpy as np
from sklearn.preprocessing import StandardScaler
from scipy.signal import butter, filtfilt, iirnotch
import pandas as pd

In [3]:
# preprocess data
def butterworth_filter(data, order, cutoff_freq, fs):
    nyquist_freq = 0.5 * fs
    normal_cutoff = cutoff_freq / nyquist_freq
    b, a = butter(order, normal_cutoff, btype='low', analog=False)
    filtered_data = filtfilt(b, a, data, axis=1)
    return filtered_data

def notch_filter(data, notch_freq, q_factor, fs):
    b, a = iirnotch(notch_freq, q_factor, fs=fs)
    filtered_data = filtfilt(b, a, data, axis=1)
    return filtered_data

def preprocess_data(eeg_data):
    fs = 256  # Sampling frequency
    order = 6  # Filter order
    cutoff_freq = 80  # Cutoff frequency
    notch_freq = 50  # Notch frequency
    q_factor = 30  # Quality factor
    notch_filtered_data = notch_filter(eeg_data, notch_freq, q_factor, fs)
    filtered_data = butterworth_filter(notch_filtered_data, order, cutoff_freq, fs)
    return filtered_data

In [4]:
sig_1hz_before_coffe = []
sig_1hz_after_coffe = []
sig_5hz_before_coffe = []
sig_5hz_after_coffe = []
sig_10hz_before_coffe = []
sig_10hz_after_coffe = []
sig_15hz_before_coffe = []
sig_15hz_after_coffe = []
sig_20hz_before_coffe = []
sig_20hz_after_coffe = []
sig_25hz_before_coffe = []
sig_25hz_after_coffe = []
sig_30hz_before_coffe = []
sig_30hz_after_coffe = []

volunteers = ["1", "2", "3", "4", "5", "6"]
labels = ["A", "B"]

data_labels = []

# before coffe
for volunteer in volunteers:
    label = "B" # B = before coffe

    file_name = volunteer + "_" + label + ".mat"

    mat = sio.loadmat(file_name)
    mat = mat["ssvep"]

    data = []

    for stimuli in mat:
        stimuli_name = stimuli[1][0]
        stimuli_data = np.array(stimuli[0])
        stimuli_data = np.delete(stimuli_data, -1, axis=1)

        # preprocess data
        stimuli_data = preprocess_data(stimuli_data)

        if stimuli_name == "sig1Hz":
            sig_1hz_before_coffe.append(stimuli_data)
        elif stimuli_name == "sig5Hz":
            sig_5hz_before_coffe.append(stimuli_data)
        elif stimuli_name == "sig10Hz":
            sig_10hz_before_coffe.append(stimuli_data)
        elif stimuli_name == "sig15Hz":
            sig_15hz_before_coffe.append(stimuli_data)
        elif stimuli_name == "sig20Hz":
            sig_20hz_before_coffe.append(stimuli_data)
        elif stimuli_name == "sig25Hz":
            sig_25hz_before_coffe.append(stimuli_data)
        elif stimuli_name == "sig30Hz":
            sig_30hz_before_coffe.append(stimuli_data)

# after coffe
for volunteer in volunteers:
    label = "A" # A = after coffe

    file_name = volunteer + "_" + label + ".mat"

    mat = sio.loadmat(file_name)
    mat = mat["ssvep"]

    data = []

    for stimuli in mat:
        stimuli_name = stimuli[1][0]
        stimuli_data = np.array(stimuli[0])
        stimuli_data = np.delete(stimuli_data, -1, axis=1)

        # preprocess data
        stimuli_data = preprocess_data(stimuli_data)

        if stimuli_name == "sig1Hz":
            sig_1hz_after_coffe.append(stimuli_data)
        elif stimuli_name == "sig5Hz":
            sig_5hz_after_coffe.append(stimuli_data)
        elif stimuli_name == "sig10Hz":
            sig_10hz_after_coffe.append(stimuli_data)
        elif stimuli_name == "sig15Hz":
            sig_15hz_after_coffe.append(stimuli_data)
        elif stimuli_name == "sig20Hz":
            sig_20hz_after_coffe.append(stimuli_data)
        elif stimuli_name == "sig25Hz":
            sig_25hz_after_coffe.append(stimuli_data)
        elif stimuli_name == "sig30Hz":
            sig_30hz_after_coffe.append(stimuli_data)


# split into epochs of 4 seconds (1024 samples)
# sig_1hz_before_coffe.shape = (6, 5120, 22)

# this function receives a (5120, 22) matrix and returns a (5, 1024, 22) matrix (5 epochs of 4 seconds)
def split_into_epochs(data):
    # print(np.array(data).shape)
    epochs = []
    for i in range(5):
        epoch = data[i*1024:(i+1)*1024, :]
        epochs.append(epoch)
    return np.array(epochs)

# before coffe
sig_1hz_before_coffe = np.array(sig_1hz_before_coffe)
sig_5hz_before_coffe = np.array(sig_5hz_before_coffe)
sig_10hz_before_coffe = np.array(sig_10hz_before_coffe)
sig_15hz_before_coffe = np.array(sig_15hz_before_coffe)
sig_20hz_before_coffe = np.array(sig_20hz_before_coffe)
sig_25hz_before_coffe = np.array(sig_25hz_before_coffe)
sig_30hz_before_coffe = np.array(sig_30hz_before_coffe)

sig_1hz_before_coffe = np.array([split_into_epochs(data) for data in sig_1hz_before_coffe])
sig_5hz_before_coffe = np.array([split_into_epochs(data) for data in sig_5hz_before_coffe])
sig_10hz_before_coffe = np.array([split_into_epochs(data) for data in sig_10hz_before_coffe])
sig_15hz_before_coffe = np.array([split_into_epochs(data) for data in sig_15hz_before_coffe])
sig_20hz_before_coffe = np.array([split_into_epochs(data) for data in sig_20hz_before_coffe])
sig_25hz_before_coffe = np.array([split_into_epochs(data) for data in sig_25hz_before_coffe])
sig_30hz_before_coffe = np.array([split_into_epochs(data) for data in sig_30hz_before_coffe])

# after coffe
sig_1hz_after_coffe = np.array(sig_1hz_after_coffe)
sig_5hz_after_coffe = np.array(sig_5hz_after_coffe)
sig_10hz_after_coffe = np.array(sig_10hz_after_coffe)
sig_15hz_after_coffe = np.array(sig_15hz_after_coffe)
sig_20hz_after_coffe = np.array(sig_20hz_after_coffe)
sig_25hz_after_coffe = np.array(sig_25hz_after_coffe)
sig_30hz_after_coffe = np.array(sig_30hz_after_coffe)

sig_1hz_after_coffe = np.array([split_into_epochs(data) for data in sig_1hz_after_coffe])
sig_5hz_after_coffe = np.array([split_into_epochs(data) for data in sig_5hz_after_coffe])
sig_10hz_after_coffe = np.array([split_into_epochs(data) for data in sig_10hz_after_coffe])
sig_15hz_after_coffe = np.array([split_into_epochs(data) for data in sig_15hz_after_coffe])
sig_20hz_after_coffe = np.array([split_into_epochs(data) for data in sig_20hz_after_coffe])
sig_25hz_after_coffe = np.array([split_into_epochs(data) for data in sig_25hz_after_coffe])
sig_30hz_after_coffe = np.array([split_into_epochs(data) for data in sig_30hz_after_coffe])

In [5]:
def mean(x):
    return np.array(np.mean(x))

def std(x):
    return np.array(np.std(x))

def var(x):
    return np.array(np.var(x))

def concatenate_features(x):
    return np.hstack((mean(x), std(x), var(x)))

In [8]:
labels = ["epoch", "volunteer", "stimuli_freq", "caffeine_status"]
for i in range(22):
    labels.append("mean_ch_" + str(i))
    labels.append("std_ch_" + str(i))
    labels.append("var_ch_" + str(i))

df = pd.DataFrame(columns=labels)

# change column types
for i in range(22):
    df["mean_ch_" + str(i)] = df["mean_ch_" + str(i)].astype(float)
    df["std_ch_" + str(i)] = df["std_ch_" + str(i)].astype(float)
    df["var_ch_" + str(i)] = df["var_ch_" + str(i)].astype(float)

# epoch is integer
df["epoch"] = df["epoch"].astype(int)

# volunteer is integer
df["volunteer"] = df["volunteer"].astype(int)

# stimuli_freq is integer
df["stimuli_freq"] = df["stimuli_freq"].astype(int)

# caffeine_status is a string
df["caffeine_status"] = df["caffeine_status"].astype(str)


# before coffe
def process_data(data, stimuli_freq, caffeine_status, df):
    for i, volunteer in enumerate(data):
        for j, epoch in enumerate(volunteer):
            row = [j, i, stimuli_freq, caffeine_status]

            # transpose epoch
            epoch = epoch.T

            # compute and concatenate features
            for signal in epoch:
                features = concatenate_features(signal)
                row.extend(features)

            # add row to dataframe
            df.loc[len(df)] = row

process_data(sig_1hz_before_coffe, 1, "before", df)
process_data(sig_5hz_before_coffe, 5, "before", df)
process_data(sig_10hz_before_coffe, 10, "before", df)
process_data(sig_15hz_before_coffe, 15, "before", df)
process_data(sig_20hz_before_coffe, 20, "before", df)
process_data(sig_25hz_before_coffe, 25, "before", df)
process_data(sig_30hz_before_coffe, 30, "before", df)

# after coffe
process_data(sig_1hz_after_coffe, 1, "after", df)
process_data(sig_5hz_after_coffe, 5, "after", df)
process_data(sig_10hz_after_coffe, 10, "after", df)
process_data(sig_15hz_after_coffe, 15, "after", df)
process_data(sig_20hz_after_coffe, 20, "after", df)
process_data(sig_25hz_after_coffe, 25, "after", df)
process_data(sig_30hz_after_coffe, 30, "after", df)

# save dataframe to csv
df.to_csv("data.csv", index=False)