# Section II: Pre-processign & hybrid deep learning structure

# A. Pre-processing

1. take out pre-trial signals from all C channels and cut it into N segmentations with length L --> N (C*L matrixes)
2. element-wise addition (matrixes) & mean value --> BaseMean (C*L matrixes) --> basic emotion state
3. RawEEG: segment raw EEG into M (C*L matrixes)
4. BaseRemoved = RawEEG - BaseMean
5. concatenate all BaseRermoved matrixes into a big matrix (size the same with raw EEG)

DEAP dataset: n=32; h=9 (vertical); w=9 (horizontal)

# B. Converting 1D EEG into 2D EEG frames

In [1]:
import scipy.io as sio
import argparse
import os
import sys
import numpy as np
import pandas as pd
import time
import pickle

np.random.seed(0)

def data_1Dto2D(data, Y=9, X=9):
    data_2D = np.zeros([Y, X])
    data_2D[0] = (0,        0,          0,          data[0],    0,          data[16],   0,          0,          0       )
    data_2D[1] = (0,        0,          0,          data[1],    0,          data[17],   0,          0,          0       )
    data_2D[2] = (data[3],  0,          data[2],    0,          data[18],   0,          data[19],   0,          data[20])
    data_2D[3] = (0,        data[4],    0,          data[5],    0,          data[22],   0,          data[21],   0       )
    data_2D[4] = (data[7],  0,          data[6],    0,          data[23],   0,          data[24],   0,          data[25])
    data_2D[5] = (0,        data[8],    0,          data[9],    0,          data[27],   0,          data[26],   0       )
    data_2D[6] = (data[11], 0,          data[10],   0,          data[15],   0,          data[28],   0,          data[29])
    data_2D[7] = (0,        0,          0,          data[12],   0,          data[30],   0,          0,          0       )
    data_2D[8] = (0,        0,          0,          data[13],   data[14],   data[31],   0,          0,          0       )
    # return shape:9*9
    return data_2D


In [2]:
# Z-score normalization
# Z = (x - u) / sigma
# x: a non-zero element from a certain position of the frame
# u: the mean of all non-zero elements
# sigma: the stand deviation of these elements

def feature_normalize(data):
    mean = data[data.nonzero()].mean()
    sigma = data[data. nonzero ()].std()
    data_normalized = data
    data_normalized[data_normalized.nonzero()] = (data_normalized[data_normalized.nonzero()] - mean)/sigma
    # return shape: 9*9
    return data_normalized

def norm_dataset(dataset_1D):
    norm_dataset_1D = np.zeros([dataset_1D.shape[0], 32])
    for i in range(dataset_1D.shape[0]):
        norm_dataset_1D[i] = feature_normalize(dataset_1D[i])
    # return shape: m*32
    return norm_dataset_1D

In [3]:
def dataset_1Dto2D(dataset_1D):
    dataset_2D = np.zeros([dataset_1D.shape[0],9,9])
    for i in range(dataset_1D.shape[0]):
        dataset_2D[i] = data_1Dto2D(dataset_1D[i])
    # return shape: m*9*9
    return dataset_2D

def norm_dataset_1Dto2D(dataset_1D):
    norm_dataset_2D = np.zeros([dataset_1D.shape[0], 9, 9])
    for i in range(dataset_1D.shape[0]):
        norm_dataset_2D[i] = feature_normalize( data_1Dto2D(dataset_1D[i]))
    # return shape: m*9*9
    return norm_dataset_2D

In [4]:
def windows(data, size):
    start = 0
    while ((start+size) < data.shape[0]):
        yield int(start), int(start + size)
        start += size

In [5]:
def segment_signal_without_transition(data,label,label_index,window_size):
    # get data file name and label file name
    for (start, end) in windows(data, window_size):
        # print(data.shape)
        if((len(data[start:end]) == window_size)):
            if(start == 0):
                segments = data[start:end]
                segments = np.vstack([segments, data[start:end]])

                labels = np.array(label[label_index])
                labels = np.append(labels, np.array(label[label_index]))
            else:
                segments = np.vstack([segments, data[start:end]])
                labels = np.append(labels, np.array(label[label_index])) # labels = np.append(labels, stats.mode(label[start:end])[0][0])
    return segments, labels

In [6]:
def apply_mixup(dataset_file,window_size,label,yes_or_not): # initial empty label arrays
    print("Processing",dataset_file,"..........")
    data_file_in = sio.loadmat(dataset_file)
    data_in = data_file_in["data"].transpose(0,2,1)
    #0 valence, 1 arousal, 2 dominance, 3 liking
    if label=="arousal":
        label=1
    elif label=="valence":
        label=0
    label_in= data_file_in["labels"][:,label]>5
    label_inter	= np.empty([0]) # initial empty data arrays
    data_inter_cnn	= np.empty([0,window_size, 9, 9])
    data_inter_rnn	= np.empty([0, window_size, 32])
    trials = data_in.shape[0]

    # Data pre-processing
    for trial in range(0,trials):
        if yes_or_not=="yes":
            base_signal = (data_in[trial,0:128,0:32]+data_in[trial,128:256,0:32]+data_in[trial,256:384,0:32])/3
        else:
            base_signal = 0
        data = data_in[trial,384:8064,0:32]
        # compute the deviation between baseline signals and experimental signals
        for i in range(0,60):
            data[i*128:(i+1)*128,0:32]=data[i*128:(i+1)*128,0:32]-base_signal
        label_index = trial
        #read data and label
        data = norm_dataset(data)
        data, label = segment_signal_without_transition(data, label_in,label_index,window_size)
        # cnn data process
        data_cnn    = dataset_1Dto2D(data)
        data_cnn    = data_cnn.reshape ( int(data_cnn.shape[0]/window_size), window_size, 9, 9)
        # rnn data process
        data_rnn    = data. reshape(int(data.shape[0]/window_size), window_size, 32)
        # append new data and label
        data_inter_cnn  = np.vstack([data_inter_cnn, data_cnn])
        data_inter_rnn  = np.vstack([data_inter_rnn, data_rnn])
        label_inter = np.append(label_inter, label)
    '''
    print("total cnn size:", data_inter_cnn.shape)
    print("total rnn size:", data_inter_rnn.shape)
    print("total label size:", label_inter.shape)
    '''
    # shuffle data
    index = np.array(range(0, len(label_inter)))
    np.random.shuffle( index)
    shuffled_data_cnn	= data_inter_cnn[index]
    shuffled_data_rnn	= data_inter_rnn[index]
    shuffled_label 	= label_inter[index]
    return shuffled_data_cnn ,shuffled_data_rnn,shuffled_label,record

In [23]:
begin = time.time()
print("time begin:",time.localtime())
dataset_dir = "/Users/zouhao/Desktop/EEGResearch/Dataset/"
window_size = 128
output_dir = "/Users/zouhao/Desktop/EEGResearch/deap_shuffled_data/"

label_class = 'arousal'     # arousal/valence
suffix = 'yes'     # yes/no (using baseline signals or not)
# get directory name for one subject
record_list = [task for task in os.listdir(dataset_dir) if os.path.isfile(os.path.join(dataset_dir,task))]
output_dir = output_dir+suffix+"_"+label_class+"/"
if os.path.isdir(output_dir)==False:
    os.makedirs(output_dir)
# print(record_list)

for record in record_list:
    file = os.path.join(dataset_dir,record)
    shuffled_cnn_data,shuffled_rnn_data,shuffled_label,record = apply_mixup(file, window_size,label_class,suffix)
    output_data_cnn = output_dir+record+"_win_"+str(window_size)+"_cnn_dataset.pkl"
    output_data_rnn = output_dir+record+"_win_"+str(window_size)+"_rnn_dataset.pkl"
    output_label= output_dir+record+"_win_"+str(window_size)+"_labels.pkl"

    with open(output_data_cnn, "wb") as fp:
        pickle.dump( shuffled_cnn_data,fp, protocol=4)
    with open( output_data_rnn, "wb") as fp:
        pickle.dump(shuffled_rnn_data, fp, protocol=4)
    with open(output_label, "wb") as fp:
        pickle.dump(shuffled_label, fp)
    end = time.time()
    print("end time:",time.localtime())
    print("time consuming:",(end-begin))

time begin: time.struct_time(tm_year=2020, tm_mon=7, tm_mday=21, tm_hour=16, tm_min=41, tm_sec=38, tm_wday=1, tm_yday=203, tm_isdst=1)
Processing /Users/zouhao/Desktop/EEGResearch/Dataset/s01.mat ..........
end time: time.struct_time(tm_year=2020, tm_mon=7, tm_mday=21, tm_hour=16, tm_min=42, tm_sec=9, tm_wday=1, tm_yday=203, tm_isdst=1)
time consuming: 31.070467233657837
Processing /Users/zouhao/Desktop/EEGResearch/Dataset/s15.mat ..........
end time: time.struct_time(tm_year=2020, tm_mon=7, tm_mday=21, tm_hour=16, tm_min=42, tm_sec=39, tm_wday=1, tm_yday=203, tm_isdst=1)
time consuming: 60.94023513793945
Processing /Users/zouhao/Desktop/EEGResearch/Dataset/s29.mat ..........
end time: time.struct_time(tm_year=2020, tm_mon=7, tm_mday=21, tm_hour=16, tm_min=43, tm_sec=12, tm_wday=1, tm_yday=203, tm_isdst=1)
time consuming: 93.29165124893188
Processing /Users/zouhao/Desktop/EEGResearch/Dataset/s28.mat ..........
end time: time.struct_time(tm_year=2020, tm_mon=7, tm_mday=21, tm_hour=16, t

# C. PCNN

## 1. CNN works for mining cross-channel correlation and extracting features from 2D frames

there are 3 continuous 2D convolutional layers with a same kernel size of 4 * 4 for spatial feature extraction

In [None]:
import sys
import sklearn
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import roc_curve
from sklearn.metrics import auc
import os
import pandas as pd
import pickle
import tensorflow as tf
import numpy as np
import time
import math
import scipy.io as sio

In [None]:
final_fuse = "concat"

conv_1_shape = '4*4*32'
pool_1_shape = 'None'

conv_2_shape = '4*4*64'
pool_2_shape = 'None'

conv_3_shape = '4*4*128'
pool_3_shape = 'None'

conv_4_shape = '1*1*13'
pool_4_shape = 'None'

window_size = 128
n_lstm_layers = 2

## 2. RNN (LSTM) models the context information for streaming 1D data vectors

## 3. feature fusion method: fuse the extracted features at last for final emotion recognition

In [4]:
import sys
import sklearn
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import roc_curve
from sklearn.metrics import auc
import os
import pandas as pd
import pickle
import tensorflow as tf
import numpy as np
import time
import math
import scipy.io as sio

In [5]:
final_fuse = "concat"

conv_1_shape = '4*4*32'
pool_1_shape = 'None'

conv_2_shape = '4*4*64'
pool_2_shape = 'None'

conv_3_shape = '4*4*128'
pool_3_shape = 'None'

conv_4_shape = '1*1*13'
pool_4_shape = 'None'

window_size = 128
n_lstm_layers = 2

In [6]:
# lstm full connected parameter
n_hidden_state = 32
print("\nsize of hidden state", n_hidden_state)
n_fc_out = 1024
n_fc_in = 1024

dropout_prob = 0.5
np.random.seed(32)

norm_type = '2D'
regularization_method = 'dropout'
enable_penalty = True

cnn_suffix        =".mat_win_128_cnn_dataset.pkl"
rnn_suffix        =".mat_win_128_rnn_dataset.pkl"
label_suffix    =".mat_win_128_labels.pkl"


size of hidden state 32


In [7]:
arousal_or_valence = 'arousal'
with_or_without = 'no'
dataset_dir = "/Users/zouhao/Desktop/EEGResearch/Parallel_CRNN/deep_suffled_data/"+with_or_without+"_"+arousal_or_valence+"/"

In [8]:
###load training set
with open(dataset_dir  + cnn_suffix, "rb") as fp:
    cnn_datasets = pickle.load(fp)
with open(dataset_dir  + rnn_suffix, "rb") as fp:
    rnn_datasets = pickle.load(fp)
with open(dataset_dir  + label_suffix, "rb") as fp:
    labels = pickle.load(fp)
    labels = np.transpose(labels)
    print("loaded shape:",labels.shape)
lables_backup = labels
print("cnn_dataset shape before reshape:", np.shape(cnn_datasets))
cnn_datasets = cnn_datasets.reshape(len(cnn_datasets), window_size, 9,9, 1)
print("cnn_dataset shape after reshape:", np.shape(cnn_datasets))
one_hot_labels = np.array(list(pd.get_dummies(labels)))


FileNotFoundError: [Errno 2] No such file or directory: '/Users/zouhao/Desktop/EEGResearch/Parallel_CRNN/deep_suffled_data/no_arousal/.mat_win_128_cnn_dataset.pkl'

In [5]:
import xlrd
import xlwt
import argparse

out_book = xlwt.Workbook(encoding='utf-8', style_compression=0)
out_sheet = out_book.add_sheet('accuracy', cell_overwrite_ok=True)
column_index = 0
persons = 32
fold =10 

In [6]:
def fill_heaer(column_index,model_name,target_class):
    # table header
    out_sheet.write(0,column_index,"model_name:")
    out_sheet.write(0,column_index+1,model_name)
    out_sheet.write(1,column_index,"target_class:")
    out_sheet.write(1,column_index+1,target_class)
    out_sheet.write(2,column_index,"subject")
    out_sheet.write(2,column_index+1,"accuracy")

In [7]:
def fill_cells(dir_path,column_index,model_name,target_class):
    fill_heaer(column_index,model_name,target_class)
    total_accuracy = 0
    for sub in range(1,persons+1):
        subject = "s%02d"%sub
        accuracy = 0
        for count in range(fold):
            input_file = dir_path+target_class+"/"+str(subject)+"_"+str(count)+".xlsx"
            in_book = xlrd.open_workbook(input_file)
            sheet = in_book.sheet_by_name("condition")
            accuracy += sheet.cell_value(1,0)
        accuracy = (accuracy/10)*100
        total_accuracy += accuracy
        print(sub,":",accuracy)
        out_sheet.write(sub + 2,column_index, subject)
        out_sheet.write(sub + 2,column_index+1,accuracy)
    mean_accuracy = total_accuracy/persons
    print("mean accuracy:",mean_accuracy)
    out_sheet.write(sub+3,column_index,"mean:")
    out_sheet.write(sub+3,column_index+1,mean_accuracy)

In [8]:
def fill_cells_origin(dir_path,column_index,model_name,target_class):
    # table header
    fill_heaer(column_index,model_name,target_class)
    total_accuracy = 0
    for sub in range(1,persons+1):
        subject = "s%02d"%sub
        accuracy = 0
        input_file = dir_path+target_class+"/"+str(subject)+".xlsx"
        in_book = xlrd.open_workbook(input_file)
        sheet = in_book.sheet_by_name("condition")
        accuracy += sheet.cell_value(1,0)
        accuracy = (accuracy)*100
        total_accuracy += accuracy
        print(sub,":",accuracy)
        out_sheet.write(sub + 2,column_index, subject)
        out_sheet.write(sub + 2,column_index+1,accuracy)
    mean_accuracy = total_accuracy/persons
    print("mean accuracy:",mean_accuracy)
    out_sheet.write(sub+3,column_index,"mean:")
    out_sheet.write(sub+3,column_index+1,mean_accuracy)