In [1]:
# Created by Joey O'Neill
# College of Charleston
# Classification Comparison of Deep Learning Models on an Imaginary Speech EEG Dataset
# July 2022
# URL: https://www.researchgate.net/publication/361728491_Classification_Comparison_of_Deep_Learning_Models_on_an_Imaginary_Speech_EEG_Dataset

In [4]:
# imports for pipeline
import glob
import math
import numpy as np
import os
import pandas as pd
import pickle
from sklearn.model_selection import train_test_split
from tempfile import TemporaryFile

In [None]:
# imports for models
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models

### Python Functions For Data Preprocessing Pipeline

In [5]:
# Normalizes Individual Matrices
def feature_normalize(data):
  mean = data[data.nonzero()].mean()
  sigma = data[data.nonzero()].std()
  data_normalized = data
  data_normalized[data_normalized.nonzero()] = (data_normalized[data_normalized.nonzero()] - mean)/sigma
  return data_normalized

In [6]:
# Normalizes entire 1D Dataset
def norm_dataset(dataset_1D):
  norm_dataset_1D = np.zeros([dataset_1D.shape[0], 16])
  for i in range(dataset_1D.shape[0]):
    norm_dataset_1D[i] =  feature_normalize(dataset_1D[i])
  return norm_dataset_1D

In [7]:
# Turns data from 1D to 2D matrix
# STILL 64 channel format
# FUNCTION MEANT FOR OPENBCI HEADSET LAYOUT
def data_1Dto2D(data, Y=10, X=11):

  # initialize empty matrix to append to and return
  full_matrix = np.empty((0, 10, 11))

  # goes through every 1D timestamp and transforms to 2D spatial matrix
  for i in range(len(data)):
    data_2D = np.zeros([Y, X])

    data_2D[0] = (0, 0, 0, 0, data[i][0], 0, data[i][1], 0, 0, 0, 0)
    data_2D[1] = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
    data_2D[2] = (0, data[i][8], 0, data[i][10], 0, 0, 0, data[i][11], 0, data[i][9], 0)
    data_2D[3] = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
    data_2D[4] = (0, data[i][12], 0, data[i][2], 0, 0, 0, data[i][3], 0, data[i][13], 0)
    data_2D[5] = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
    data_2D[6] = (0, data[i][4], 0, data[i][14], 0, 0, 0, data[i][15], 0, data[i][5], 0)
    data_2D[7] = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
    data_2D[8] = (0, 0, 0, 0, data[i][6], 0, data[i][7], 0, 0, 0, 0)
    data_2D[9] = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
    
    full_matrix = np.append(full_matrix, data_2D)

  # reshape full_matrix to proper shape
  full_matrix = full_matrix.reshape(len(data), 10, 11)

  # return full matrix
  return full_matrix

In [8]:
# Shuffles two arrays in unison
# To be used to randomize the cnn and rnn arrays together
def unison_shuffle_arrays(arr_1, arr_2, arr_3):
  if len(arr_1) != len(arr_2) or len(arr_1) != len(arr_3):
    print('ERROR: The length of the two arrays are not equal.')
    return -1
  rp = np.random.permutation(len(arr_1))
  return arr_1[rp], arr_2[rp], arr_3[rp]

In [9]:
# Separates individual events into dataset
# For the 30s len recordings
def seperate_events_30s(data):
  event_matrix = np.empty((0, 125, 10, 11))

  # Separate each event from the data
  event_1 = data[1125:1250]
  event_2 = data[1375:1500]
  event_3 = data[1625:1750]
  event_4 = data[1875:2000]
  event_5 = data[2125:2250]
  event_6 = data[2375:2500]
  event_7 = data[2625:2750]
  event_8 = data[2875:3000]
  event_9 = data[3125:3250]
  event_10 = data[3375:3500]

  # Append each event to event matrix
  event_matrix = np.append(event_matrix, event_1)
  event_matrix = np.append(event_matrix, event_2)
  event_matrix = np.append(event_matrix, event_3)
  event_matrix = np.append(event_matrix, event_4)
  event_matrix = np.append(event_matrix, event_5)
  event_matrix = np.append(event_matrix, event_6)
  event_matrix = np.append(event_matrix, event_7)
  event_matrix = np.append(event_matrix, event_8)
  event_matrix = np.append(event_matrix, event_9)
  event_matrix = np.append(event_matrix, event_10)

  # Return event matrix
  return event_matrix

In [10]:
# MAIN DRIVER FUNCTION: Saves to event array
# file_list: list of files to be iterated through
# events_per_file: 3 events for 16s files, 10 for 30s files
# label: 0 for /x/, 1 for /y/
def save_events(file_list, events_per_file, label):
  
  # Variable Init
  event_labels = []
  cnn_event_array = np.empty((0, 125, 10, 11))
  rnn_event_array = np.empty((0, 125, 64))
  total_events = 0
  i = 0

  for file in file_list:

    # reads csv to data frame
    df = pd.read_csv(file)
  
    # seperate channel data
    channel_data = df[[" EXG Channel 0", " EXG Channel 1", " EXG Channel 2", " EXG Channel 3",
                       " EXG Channel 4", "EXG Channel 5", " EXG Channel 6", " EXG Channel 7",
                       " EXG Channel 8", " EXG Channel 9", " EXG Channel 10", " EXG Channel 11",
                       " EXG Channel 12", " EXG Channel 13", " EXG Channel 14", " EXG Channel 15"]]
    
    # rename channel_data columns
    channel_data = channel_data.rename({" EXG Channel 0":'1', " EXG Channel 1":'2', " EXG Channel 2":'3',
                                        " EXG Channel 3":'4', " EXG Channel 4":'5', " EXG Channel 5":'6',
                                        " EXG Channel 6":'7', " EXG Channel 7":'8', " EXG Channel 8":'9',
                                        " EXG Channel 9":'10', " EXG Channel 10":'11', " EXG Channel 11":'12',
                                        " EXG Channel 12":'13', " EXG Channel 13":'14', " EXG Channel 14":'15',
                                        " EXG Channel 15":'16'}, axis='columns')

    # turn channel_data into a np array
    channel_data = channel_data.to_numpy()

    # normalize channel_data
    rnn_data = norm_dataset(channel_data)

    # separation of events in rnn event array
    rnn_events = seperate_events_30s(channel_data)

    # transform channel data from 1D array to spatial matrices
    channel_data = data_1Dto2D(channel_data)

    # separation of events into event array
    cnn_events = seperate_events_30s(channel_data)

    # add events to total events for future reshaping
    total_events = total_events + events_per_file

    # append the rnn events array to event array
    rnn_event_array = np.append(rnn_event_array, rnn_events)

    # append the events array to event array
    cnn_event_array = np.append(cnn_event_array, cnn_events)
    print('File ' + str(i) + ' complete.')
    
    # add for count
    i = i + 1

  # reshape event arrays
  rnn_event_array = rnn_event_array.reshape(total_events, 125, 16)
  cnn_event_array = cnn_event_array.reshape(total_events, 125, 10, 11)

  # make label array for the events
  # label: 0 for X, 1 for Y
  for n in range(total_events):
    if(label == 0):
      event_labels.append(0)
    elif(label == 1):
      event_labels.append(1)

  print('labels array complete.')

  # make a list to return both arrays
  ret = [cnn_event_array, rnn_event_array, event_labels]

  # return value for function
  return ret

### Python Code for Reading the EEG Data into the python workspace

In [None]:
# Directories of EEG data
x_dir = r"C:\DIRECTORY-GOES-HERE\Recordings\x_files\*\*"
y_dir = r"C:\DIRECTORY-GOES-HERE\Recordings\y_files\*\*"

In [None]:
# Initialization of file lists
x_files = []
y_files = []

In [None]:
# Append files to file list
for file in glob.glob(x_dir):
  x_files.append(file)
for file in glob.glob(y_dir):
  y_files.append(file)

In [None]:
# Save to event array (SEE save_events FUNCTION)
# file_list: list of files to be iterated through
# events_per_file: 10
# label: 0 for X, 1 for Y
x_arr = save_events(x_files, 10, 0)
y_arr = save_events(y_files, 10, 1)

In [None]:
# adding event arrays together
total_arr_len = len(x_arr[2]) + len(y_arr[2])

In [None]:
# Initialization of empty arrays
all_cnn_events_arr = np.empty((0, 125, 10, 11))
all_rnn_events_arr = np.empty((0, 125, 16))
all_labels_arr = np.empty((0))

In [None]:
# append all cnn event arrays to all_cnn_event_arr
all_cnn_events_arr = np.append(all_cnn_events_arr, x_arr[0])
all_cnn_events_arr = np.append(all_cnn_events_arr, y_arr[0])

In [None]:
# append all rnn event arrays to all_rnn_event_arr
all_rnn_events_arr = np.append(all_rnn_events_arr, x_arr[1])
all_rnn_events_arr = np.append(all_rnn_events_arr, y_arr[1])

In [None]:
# reshape arrays to maintain dimensions
all_cnn_events_arr = all_cnn_events_arr.reshape(total_arr_len, 125, 10, 11)
all_rnn_events_arr = all_rnn_events_arr.reshape(total_arr_len, 125, 16)

In [None]:
# convert labels to NumPy array
x_arr_labels = np.asarray(x_arr[2])
y_arr_labels = np.asarray(y_arr[2])

In [None]:
# append all labels to all_labels_arr
all_labels_arr = np.append(all_labels_arr, x_arr_labels)
all_labels_arr = np.append(all_labels_arr, y_arr_labels)

In [None]:
# Encoding Labels
label_encoder = LabelEncoder()
encoded_labels = np.array(label_encoder.fit_transform(all_labels_arr))

### Train Test Split

In [None]:
# Split the data
split = np.random.rand(len(all_cnn_events_arr)) < 0.75

In [None]:
# Train Data
train_X_cnn = all_cnn_events_arr[split]
train_X_rnn = all_rnn_events_arr[split]
train_y = encoded_labels[split]

# Test Data
test_X_cnn = all_cnn_events_arr[~split]
test_X_rnn = all_rnn_events_arr[~split]
test_y = encoded_labels[~split]

In [None]:
# randomize the data
# return values:
# 0 - cnn_events
# 1 - rnn_events
# 2 - labels
randomized_train = unison_shuffle_arrays(train_X_cnn, train_X_rnn, train_y)
randomized_test = unison_shuffle_arrays(test_X_cnn, test_X_rnn, test_y)

In [None]:
# assign return values
train_X_cnn = randomized_train[0]
train_X_rnn = randomized_train[1]
train_y = randomized_train[2]

test_X_cnn = randomized_test[0]
test_X_rnn = randomized_test[1]
test_y = randomized_test[2]

In [None]:
# Expand dimensions in Data for model read in
train_X_cnn = tf.expand_dims(train_X_cnn, -1)
test_X_cnn = tf.expand_dims(test_X_cnn, -1)

### **Model 1: O’Neill et al. 3D-CNN-DNN**

In [11]:
##################################################################
# 3D-CNN-DNN Model build                                         #
##################################################################

# Input
a1_inputs = tf.keras.Input(shape = (125, 10, 11, 1))

# 3-Layer 3D CNN
a1_model = layers.Conv3D(32, kernel_size=(3, 3, 3), activation='relu')(a1_inputs)
a1_model = layers.Conv3D(64, (3, 3, 3), activation = 'relu')(a1_model)
a1_model = layers.Conv3D(128, (5, 4, 3), activation = 'relu')(a1_model)

# Flatten to Dense
a1_model = layers.Flatten()(a1_model)

# FC Layers
a1_model = layers.Dense(1024, activation='relu')(a1_model)
a1_model = layers.Dense(1024, activation='relu')(a1_model)
a1_model = layers.Dense(1024, activation='relu')(a1_model)

# Softmax Output
a1_outputs = layers.Dense(5, activation='softmax')(a1_model)

# Model Creation
a_3d_cnn = keras.Model(inputs=a1_inputs, outputs=a1_outputs)

### **Model 2: Zhang et al.’s Parallel CRNN**

In [12]:
##################################################################
# ZHANG et al.'s PARALLEL CRNN                                   #
##################################################################

# 3-Layer 2D CNN

# Input: Spatial Matrices
inputs_1 = tf.keras.Input(shape = (125, 10, 11, 1))

# 2D CNN Layers
model_1 = layers.Conv2D(32, (3, 3), activation='relu')(inputs_1)
model_1 = layers.Conv2D(64, (3, 3), activation='relu')(model_1)
model_1 = layers.Conv2D(128, (3, 2), activation='relu')(model_1)

# Flatten CNN
model_1 = layers.Flatten()(model_1)

# CNN FC Layer
model_1 = layers.Dense(1024, activation='relu')(model_1)

##################################################################
# 2-Layer LSTM RNN                                               #
##################################################################

# Input: Voltage-at-time array
inputs_2 = tf.keras.Input(shape = (125, 16))

# LSTM Layer 1
model_2 = layers.LSTM(16, return_sequences = True)(inputs_2)

# LSTM Layer 2
model_2 = layers.LSTM(16)(model_2)

# LSTM FC Layer
model_2 = layers.Dense(1024)(model_2)

##################################################################
# Model Concatenation and output
##################################################################

# concat the RNN and the CNN
model_concat = tf.concat([model_1, model_2], axis=1)

# SoftMax Output
outputs = layers.Dense(2, activation='softmax')(model_concat)

# Model Creation
parallel_crnn = keras.Model(inputs=[inputs_1, inputs_2], outputs=[outputs])

### **Model 3: Zhang et al.’s Cascade CRNN**

In [13]:
##################################################################
# ZHANG et al.'s Cascade CRNN                                    #
##################################################################

# Input
crnn_inputs = tf.keras.Input(shape = (125, 10, 11, 1))

# 3-Layer 2D CNN
x = layers.Conv2D(32, (3, 3), activation='relu')(crnn_inputs)
x = layers.Conv2D(64, (3, 3), activation='relu')(x)
x = layers.Conv2D(128, (3, 3), activation='relu')(x)

# Flatten ConvNet Output
x = layers.Flatten()(x)

# Fully Connected layer
x = layers.Dense(1024, activation='relu')(x)

# Reshape Feature Vector to fit in LSTM cell
x = layers.Reshape((1, x.shape[1]))(x)

# 2-Layer LSTM RNN
# Changing units to 1024 from 64, 16 (original) respectively
x = layers.LSTM(1024, return_sequences = True)(x)
x = layers.LSTM(1024)(x)

# Output
crnn_outputs = layers.Dense(2, activation='softmax')(x)

# Model Creation
cascade_crnn = keras.Model(inputs=crnn_inputs, outputs=crnn_outputs)

### **Model 4: Modified Cecotti & Jha’s 4-Layer 3DCNN**

In [14]:
##################################################################
# (MODIFIED) Cecotti and Jha's 4-Layer 3DCNN                     #
##################################################################

# Input
c4_inputs = tf.keras.Input(shape = (125, 10, 11, 1))

# 4-Layer 3D CNN
c4_model = layers.Conv3D(32, kernel_size=(3, 3, 3), activation='relu')(c4_inputs)
c4_model = layers.Conv3D(64, (3, 3, 3), activation = 'relu')(c4_model)
c4_model = layers.Conv3D(128, (3, 3, 3), activation = 'relu')(c4_model)
c4_model = layers.Conv3D(256, (3, 2, 3), activation = 'relu')(c4_model)

# Flatten to Dense
c4_model = layers.Flatten()(c4_model)

# FC Layers (DNN)
c4_model = layers.Dense(1024, activation='relu')(c4_model)
c4_model = layers.Dense(1024, activation='relu')(c4_model)
c4_model = layers.Dense(1024, activation='relu')(c4_model)

# SoftMax Output
c4_outputs = layers.Dense(2, activation='softmax')(c4_model)

# Model Creation
c4_3d_cnn = keras.Model(inputs=c4_inputs, outputs=c4_outputs)

### **Model 5: Liu et al.’s CNN-SAE-DNN**

In [15]:
##################################################################
# Liu et al/'s CNN-SAE-DNN                                       #
##################################################################
# Input
inputs = tf.keras.Input(shape = (125, 10, 11))

# 2D CNN
model = layers.Conv2D(32, (3, 3), activation='relu')(inputs)
model = layers.Conv2D(64, (3, 3), activation='relu')(model)

# Flatten into SAE
model = layers.Flatten()(model)

# SAE
model = layers.Dense(5120, activation='relu')(model)
model = layers.Dense(120, activation='relu')(model)
model = layers.Dense(5120, activation='relu')(model)

# DNN
model = layers.Dense(1024, activation='relu')(model)
model = layers.Dense(1024, activation='relu')(model)
model = layers.Dense(1024, activation='relu')(model)

# Softmax Output
outputs = layers.Dense(2, activation='softmax')(model)

# Model Creation
liu_model = keras.Model(inputs=inputs, outputs=outputs)